cadmus_core/document/html/
dom.rs1use fxhash::{FxHashMap, FxHashSet};
2use std::num::NonZeroUsize;
3
4pub type Attributes = FxHashMap<String, String>;
5pub const WRAPPER_TAG_NAME: &str = "anonymous";
6
7#[derive(Debug, Clone)]
8pub enum NodeData {
9 Root,
10 Wrapper(usize),
11 Element(ElementData),
12 Text(TextData),
13 Whitespace(TextData),
14}
15
16#[derive(Debug, Clone)]
17pub struct ElementData {
18 pub offset: usize,
19 pub name: String,
20 pub qualified_name: Option<String>,
21 pub attributes: Attributes,
22 pub force_block: bool,
25}
26
27impl ElementData {
28 fn is_block(&self) -> bool {
29 self.force_block
30 || matches!(
31 self.name.as_str(),
32 "address"
33 | "article"
34 | "aside"
35 | "blockquote"
36 | "body"
37 | "head"
38 | "details"
39 | "dialog"
40 | "dd"
41 | "div"
42 | "dl"
43 | "dt"
44 | "fieldset"
45 | "figcaption"
46 | "figure"
47 | "footer"
48 | "form"
49 | "h1"
50 | "h2"
51 | "h3"
52 | "h4"
53 | "h5"
54 | "h6"
55 | "header"
56 | "hgroup"
57 | "hr"
58 | "html"
59 | "li"
60 | "main"
61 | "nav"
62 | "ol"
63 | "p"
64 | "pre"
65 | "section"
66 | "table"
67 | "thead"
68 | "colgroup"
69 | "tbody"
70 | "tfoot"
71 | "tr"
72 | "caption"
73 | "td"
74 | "th"
75 | "ul"
76 )
77 }
78}
79
80impl NodeData {
81 fn text(&self) -> Option<&str> {
82 match *self {
83 NodeData::Text(TextData { ref text, .. })
84 | NodeData::Whitespace(TextData { ref text, .. }) => Some(text),
85 _ => None,
86 }
87 }
88
89 fn offset(&self) -> usize {
90 match *self {
91 NodeData::Text(TextData { offset, .. })
92 | NodeData::Whitespace(TextData { offset, .. })
93 | NodeData::Element(ElementData { offset, .. }) => offset,
94 NodeData::Wrapper(offset) => offset,
95 NodeData::Root => 0,
96 }
97 }
98}
99
100#[derive(Debug, Clone)]
101pub struct TextData {
102 pub offset: usize,
103 pub text: String,
104}
105
106pub fn element(name: &str, offset: usize, attributes: Attributes) -> NodeData {
107 let colon = name.find(':');
108 NodeData::Element(ElementData {
109 offset,
110 name: name[colon.map(|index| index + 1).unwrap_or(0)..].to_string(),
111 qualified_name: colon.map(|_| name.to_string()),
112 attributes,
113 force_block: false,
114 })
115}
116
117pub fn text(text: &str, offset: usize) -> NodeData {
118 NodeData::Text(TextData {
119 offset,
120 text: text.to_string(),
121 })
122}
123
124pub fn whitespace(text: &str, offset: usize) -> NodeData {
125 NodeData::Whitespace(TextData {
126 offset,
127 text: text.to_string(),
128 })
129}
130
131#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
132pub struct NodeId(NonZeroUsize);
133
134impl NodeId {
135 pub fn from_index(n: usize) -> Self {
136 NodeId(unsafe { NonZeroUsize::new_unchecked(n + 1) })
137 }
138
139 pub fn to_index(self) -> usize {
140 self.0.get() - 1
141 }
142}
143
144#[derive(Debug, Clone)]
145pub struct XmlTree {
146 nodes: Vec<Node>,
147}
148
149#[derive(Debug, Clone)]
150pub struct Node {
151 data: NodeData,
152 parent: Option<NodeId>,
153 previous_sibling: Option<NodeId>,
154 next_sibling: Option<NodeId>,
155 first_child: Option<NodeId>,
156 last_child: Option<NodeId>,
157}
158
159impl Default for Node {
160 fn default() -> Self {
161 Node {
162 data: NodeData::Root,
163 parent: None,
164 previous_sibling: None,
165 next_sibling: None,
166 first_child: None,
167 last_child: None,
168 }
169 }
170}
171
172#[derive(Debug, Copy, Clone)]
173pub struct NodeRef<'a> {
174 pub id: NodeId,
175 pub node: &'a Node,
176 pub tree: &'a XmlTree,
177}
178
179#[derive(Debug)]
180pub struct NodeMut<'a> {
181 pub id: NodeId,
182 pub tree: &'a mut XmlTree,
183}
184
185impl XmlTree {
186 pub fn new() -> Self {
187 XmlTree {
188 nodes: vec![Node::default()],
189 }
190 }
191
192 fn node(&self, id: NodeId) -> &Node {
193 unsafe { self.nodes.get_unchecked(id.to_index()) }
194 }
195
196 fn node_mut(&mut self, id: NodeId) -> &mut Node {
197 unsafe { self.nodes.get_unchecked_mut(id.to_index()) }
198 }
199
200 pub fn get(&self, id: NodeId) -> NodeRef<'_> {
201 NodeRef {
202 id,
203 node: self.node(id),
204 tree: self,
205 }
206 }
207
208 pub fn get_mut(&mut self, id: NodeId) -> NodeMut<'_> {
209 NodeMut { id, tree: self }
210 }
211
212 pub fn root(&self) -> NodeRef<'_> {
213 self.get(NodeId::from_index(0))
214 }
215
216 pub fn root_mut(&mut self) -> NodeMut<'_> {
217 self.get_mut(NodeId::from_index(0))
218 }
219
220 pub fn push_node(&mut self, data: NodeData) -> NodeId {
221 let id = NodeId::from_index(self.nodes.len());
222 let node = Node {
223 data,
224 parent: None,
225 previous_sibling: None,
226 next_sibling: None,
227 first_child: None,
228 last_child: None,
229 };
230 self.nodes.push(node);
231 id
232 }
233
234 pub fn attach_child(&mut self, parent_id: NodeId, child_id: NodeId) {
235 let last_child = self.node(parent_id).last_child;
236
237 let child = self.node_mut(child_id);
238 child.parent = Some(parent_id);
239 child.previous_sibling = last_child;
240 child.next_sibling = None;
241
242 if let Some(last) = last_child {
243 self.node_mut(last).next_sibling = Some(child_id);
244 }
245
246 self.node_mut(parent_id).last_child = Some(child_id);
247
248 if self.node(parent_id).first_child.is_none() {
249 self.node_mut(parent_id).first_child = Some(child_id);
250 }
251 }
252
253 pub fn insert_before(&mut self, sibling_id: NodeId, new_id: NodeId) {
254 let parent_id = self.node(sibling_id).parent;
255 let prev_sibling = self.node(sibling_id).previous_sibling;
256
257 self.node_mut(new_id).parent = parent_id;
258 self.node_mut(new_id).previous_sibling = prev_sibling;
259 self.node_mut(new_id).next_sibling = Some(sibling_id);
260
261 self.node_mut(sibling_id).previous_sibling = Some(new_id);
262
263 if let Some(prev) = prev_sibling {
264 self.node_mut(prev).next_sibling = Some(new_id);
265 } else if let Some(parent) = parent_id {
266 self.node_mut(parent).first_child = Some(new_id);
267 }
268 }
269
270 pub fn detach(&mut self, id: NodeId) {
271 let node = self.node(id);
272 let parent_id = node.parent;
273 let prev_sibling = node.previous_sibling;
274 let next_sibling = node.next_sibling;
275
276 if let Some(prev) = prev_sibling {
277 self.node_mut(prev).next_sibling = next_sibling;
278 } else if let Some(parent) = parent_id {
279 self.node_mut(parent).first_child = next_sibling;
280 }
281
282 if let Some(next) = next_sibling {
283 self.node_mut(next).previous_sibling = prev_sibling;
284 } else if let Some(parent) = parent_id {
285 self.node_mut(parent).last_child = prev_sibling;
286 }
287
288 self.node_mut(id).parent = None;
289 self.node_mut(id).previous_sibling = None;
290 self.node_mut(id).next_sibling = None;
291 }
292
293 pub fn append_text_to(&mut self, id: NodeId, extra: &str) {
294 match &mut self.node_mut(id).data {
295 NodeData::Text(TextData { text, .. }) | NodeData::Whitespace(TextData { text, .. }) => {
296 text.push_str(extra);
297 }
298 _ => {}
299 }
300 }
301
302 pub fn add_attr_if_missing(&mut self, id: NodeId, name: &str, value: &str) {
303 if let NodeData::Element(ElementData {
304 ref mut attributes, ..
305 }) = self.node_mut(id).data
306 {
307 if !attributes.contains_key(name) {
308 attributes.insert(name.to_string(), value.to_string());
309 }
310 }
311 }
312
313 fn promote_blockish_inlines(&mut self) {
322 let ids: Vec<NodeId> = self
323 .root()
324 .descendants()
325 .filter(|n| {
326 matches!(n.data(), NodeData::Element(..))
327 && n.is_inline()
328 && n.descendants().any(|d| d.is_block())
329 })
330 .map(|n| n.id)
331 .collect();
332
333 for id in ids {
334 if let NodeData::Element(e) = &mut self.node_mut(id).data {
335 e.force_block = true;
336 }
337 }
338 }
339
340 pub fn wrap_lost_inlines(&mut self) {
341 self.promote_blockish_inlines();
342
343 let mut ids = Vec::new();
344 let mut known_ids = FxHashSet::default();
345
346 for n in self.root().descendants().filter(|n| n.is_inline()) {
347 if known_ids.contains(&n.id) {
348 continue;
349 }
350
351 let mut first_id = None;
352 let mut last_id = None;
353
354 for s in n.previous_siblings() {
355 if s.is_block() {
356 first_id = Some(s.next_sibling().unwrap().id);
357 break;
358 } else {
359 known_ids.insert(s.id);
360 }
361 }
362
363 for s in n.next_siblings() {
364 if s.is_block() {
365 last_id = Some(s.previous_sibling().unwrap().id);
366 break;
367 } else {
368 known_ids.insert(s.id);
369 }
370 }
371
372 if first_id.is_some() || last_id.is_some() {
373 let parent = n.parent().unwrap();
374 ids.push([
375 parent.id,
376 first_id.unwrap_or_else(|| parent.node.first_child.unwrap()),
377 last_id.unwrap_or_else(|| parent.node.last_child.unwrap()),
378 ]);
379 }
380 }
381
382 for [parent_id, first_id, last_id] in ids {
383 let offset = self.node(first_id).data.offset();
384 let mut node = self.get_mut(parent_id);
385 node.wrap_range(first_id, last_id, NodeData::Wrapper(offset));
386 }
387 }
388}
389
390impl<'a> NodeRef<'a> {
391 pub fn parent(&self) -> Option<Self> {
392 self.node.parent.map(|id| self.tree.get(id))
393 }
394
395 pub fn parent_element(&self) -> Option<Self> {
396 self.ancestors().find(|n| n.is_element() && !n.is_wrapper())
397 }
398
399 pub fn previous_sibling(&self) -> Option<Self> {
400 self.node.previous_sibling.map(|id| self.tree.get(id))
401 }
402
403 pub fn previous_sibling_element(&self) -> Option<NodeRef<'a>> {
404 self.previous_sibling_elements().next()
405 }
406
407 pub fn next_sibling_element(&self) -> Option<NodeRef<'a>> {
408 self.next_sibling_elements().next()
409 }
410
411 pub fn next_sibling(&self) -> Option<Self> {
412 self.node.next_sibling.map(|id| self.tree.get(id))
413 }
414
415 pub fn first_child(&self) -> Option<Self> {
416 self.node.first_child.map(|id| self.tree.get(id))
417 }
418
419 pub fn last_child(&self) -> Option<Self> {
420 self.node.last_child.map(|id| self.tree.get(id))
421 }
422
423 pub fn ancestors(&self) -> Ancestors<'a> {
424 Ancestors {
425 next: self.parent(),
426 }
427 }
428
429 pub fn ancestor_elements(&self) -> impl Iterator<Item = NodeRef<'a>> {
430 self.ancestors()
431 .filter(|n| n.is_element() && !n.is_wrapper())
432 }
433
434 pub fn previous_siblings(&self) -> PreviousSiblings<'a> {
435 PreviousSiblings {
436 next: self.previous_sibling(),
437 }
438 }
439
440 pub fn next_siblings(&self) -> NextSiblings<'a> {
441 NextSiblings {
442 next: self.next_sibling(),
443 }
444 }
445
446 pub fn previous_sibling_elements(&self) -> impl Iterator<Item = NodeRef<'a>> {
447 self.previous_siblings().filter(|n| n.is_element())
448 }
449
450 pub fn next_sibling_elements(&self) -> impl Iterator<Item = NodeRef<'a>> {
451 self.next_siblings().filter(|n| n.is_element())
452 }
453
454 pub fn children(&self) -> Children<'a> {
455 Children {
456 next: self.first_child(),
457 }
458 }
459
460 pub fn descendants(&self) -> Descendants<'a> {
461 Descendants {
462 root_id: self.id,
463 next: self.first_child(),
464 }
465 }
466
467 pub fn has_children(&self) -> bool {
468 self.node.first_child.is_some()
469 }
470
471 pub fn is_element(&self) -> bool {
472 matches!(
473 self.node.data,
474 NodeData::Element { .. } | NodeData::Wrapper(..) | NodeData::Root
475 )
476 }
477
478 pub fn is_inline(&self) -> bool {
479 match &self.node.data {
480 NodeData::Element(e) => !e.is_block(),
481 NodeData::Text(..) => true,
482 _ => false,
483 }
484 }
485
486 pub fn is_block(&self) -> bool {
487 match &self.node.data {
488 NodeData::Element(e) => e.is_block(),
489 NodeData::Wrapper(..) | NodeData::Root => true,
490 _ => false,
491 }
492 }
493
494 pub fn is_wrapper(&self) -> bool {
495 matches!(self.node.data, NodeData::Wrapper(..))
496 }
497
498 pub fn data(&self) -> &'a NodeData {
499 &self.node.data
500 }
501
502 pub fn offset(&self) -> usize {
503 self.node.data.offset()
504 }
505
506 pub fn text(&self) -> String {
507 self.node.data.text().map(String::from).unwrap_or_else(|| {
508 self.descendants()
509 .filter_map(|n| n.node.data.text())
510 .fold(String::new(), |mut a, b| {
511 a.push_str(b);
512 a
513 })
514 })
515 }
516
517 pub fn tag_name(&self) -> Option<&'a str> {
518 match self.node.data {
519 NodeData::Element(ElementData { ref name, .. }) => Some(name),
520 NodeData::Wrapper(..) => Some(WRAPPER_TAG_NAME),
521 _ => None,
522 }
523 }
524
525 pub fn tag_qualified_name(&self) -> Option<&'a str> {
526 match self.node.data {
527 NodeData::Element(ElementData {
528 ref qualified_name, ..
529 }) => qualified_name.as_deref(),
530 _ => None,
531 }
532 }
533
534 pub fn attributes(&self) -> Option<&'a Attributes> {
535 match self.node.data {
536 NodeData::Element(ElementData { ref attributes, .. }) => Some(attributes),
537 _ => None,
538 }
539 }
540
541 pub fn attribute(&self, name: &str) -> Option<&'a str> {
542 self.attributes()
543 .and_then(|a| a.get(name).map(String::as_str))
544 }
545
546 pub fn classes(&self) -> impl Iterator<Item = &'a str> {
547 self.attribute("class").unwrap_or("").split_whitespace()
548 }
549
550 pub fn id(&self) -> Option<&str> {
551 self.attribute("id")
552 }
553
554 pub fn find(&self, tag_name: &str) -> Option<Self> {
555 self.descendants().find(|n| n.tag_name() == Some(tag_name))
556 }
557
558 pub fn find_by_id(&self, id: &str) -> Option<Self> {
559 self.descendants().find(|n| n.id() == Some(id))
560 }
561}
562
563impl<'a> NodeMut<'a> {
564 fn node(&mut self) -> &mut Node {
565 self.tree.node_mut(self.id)
566 }
567
568 pub fn append(&mut self, data: NodeData) -> NodeId {
569 let id = NodeId::from_index(self.tree.nodes.len());
570
571 let node = Node {
572 data,
573 parent: Some(self.id),
574 previous_sibling: self.node().last_child,
575 next_sibling: None,
576 first_child: None,
577 last_child: None,
578 };
579
580 self.tree.nodes.push(node);
581
582 if let Some(last_child) = self.node().last_child {
583 self.tree.node_mut(last_child).next_sibling = Some(id);
584 }
585
586 self.node().last_child = Some(id);
587
588 if self.node().first_child.is_none() {
589 self.node().first_child = Some(id);
590 }
591
592 id
593 }
594
595 pub fn wrap_range(&mut self, first_id: NodeId, last_id: NodeId, data: NodeData) {
596 let before = self.tree.node(first_id).previous_sibling;
597 let after = self.tree.node(last_id).next_sibling;
598 let id = NodeId::from_index(self.tree.nodes.len());
599
600 let node = Node {
601 data,
602 parent: Some(self.id),
603 previous_sibling: before,
604 next_sibling: after,
605 first_child: Some(first_id),
606 last_child: Some(last_id),
607 };
608
609 self.tree.nodes.push(node);
610
611 if let Some(before_id) = before {
612 self.tree.node_mut(before_id).next_sibling = Some(id);
613 }
614
615 if let Some(after_id) = after {
616 self.tree.node_mut(after_id).previous_sibling = Some(id);
617 }
618
619 if let Some(first_child_id) = self.node().first_child {
620 if first_child_id == first_id {
621 self.node().first_child = Some(id);
622 }
623 }
624
625 if let Some(last_child_id) = self.node().last_child {
626 if last_child_id == last_id {
627 self.node().last_child = Some(id);
628 }
629 }
630
631 self.tree.node_mut(first_id).previous_sibling = None;
632 self.tree.node_mut(last_id).next_sibling = None;
633 self.tree.node_mut(first_id).parent = Some(id);
634
635 let mut node_id = first_id;
636 while let Some(next_id) = self.tree.node(node_id).next_sibling {
637 self.tree.node_mut(next_id).parent = Some(id);
638 node_id = next_id;
639 }
640 }
641}
642
643pub struct Ancestors<'a> {
644 next: Option<NodeRef<'a>>,
645}
646
647pub struct NextSiblings<'a> {
648 next: Option<NodeRef<'a>>,
649}
650
651pub struct PreviousSiblings<'a> {
652 next: Option<NodeRef<'a>>,
653}
654
655pub struct Children<'a> {
656 next: Option<NodeRef<'a>>,
657}
658
659pub struct Descendants<'a> {
660 root_id: NodeId,
661 next: Option<NodeRef<'a>>,
662}
663
664impl<'a> Iterator for Ancestors<'a> {
665 type Item = NodeRef<'a>;
666
667 fn next(&mut self) -> Option<Self::Item> {
668 let node = self.next.take();
669 self.next = node.as_ref().and_then(|node| node.parent());
670 node
671 }
672}
673
674impl<'a> Iterator for PreviousSiblings<'a> {
675 type Item = NodeRef<'a>;
676
677 fn next(&mut self) -> Option<Self::Item> {
678 let node = self.next.take();
679 self.next = node.as_ref().and_then(|node| node.previous_sibling());
680 node
681 }
682}
683
684impl<'a> Iterator for NextSiblings<'a> {
685 type Item = NodeRef<'a>;
686
687 fn next(&mut self) -> Option<Self::Item> {
688 let node = self.next.take();
689 self.next = node.as_ref().and_then(|node| node.next_sibling());
690 node
691 }
692}
693
694impl<'a> Iterator for Children<'a> {
695 type Item = NodeRef<'a>;
696
697 fn next(&mut self) -> Option<Self::Item> {
698 let node = self.next.take();
699 self.next = node.as_ref().and_then(|node| node.next_sibling());
700 node
701 }
702}
703
704impl<'a> Iterator for Descendants<'a> {
705 type Item = NodeRef<'a>;
706
707 fn next(&mut self) -> Option<Self::Item> {
708 let node = self.next.take();
709 if let Some(node) = node {
710 self.next = node
711 .first_child()
712 .or_else(|| node.next_sibling())
713 .or_else(|| {
714 node.ancestors()
715 .take_while(|n| n.id != self.root_id)
716 .find(|n| n.node.next_sibling.is_some())
717 .and_then(|n| n.next_sibling())
718 });
719 }
720 node
721 }
722}