17个版本

0.8.0 2020年11月7日
0.7.5 2020年2月11日
0.7.4 2019年10月1日
0.5.2 2019年2月12日
0.3.0 2018年12月12日

#100 in #html-parser

每月 50次下载
用于 6 个crate (3个直接使用)

MIT 协议

44KB
342

Stable Test Crate version Download License: MIT

unhtml的 derive

目录

derive 目标

struct

基本用法

#[macro_use]
extern crate unhtml_derive;
extern crate unhtml;
use unhtml::{self, FromHtml};

#[derive(FromHtml)]
#[html(selector = "#test")]
struct SingleUser {
    #[html(selector = "p:nth-child(1)", attr = "inner")]
    name: String,

    #[html(selector = "p:nth-child(2)", attr = "inner")]
    age: u8,

    #[html(selector = "p:nth-child(3)", attr = "inner")]
    like_lemon: bool,
}

let user = SingleUser::from_html(r#"<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
    <div id="test">
        <div>
            <p>Hexilee</p>
            <p>20</p>
            <p>true</p>
        </div>
    </div>
</body>
</html>"#).unwrap();
assert_eq!("Hexilee", &user.name);
assert_eq!(20, user.age);
assert!(user.like_lemon);

属性

html

target

derive targetfield

specification

#[html(selector= "...",attr= "...",default= ...)]

selectorattrdefaulthtml 本身可能是多余的。

这是有效的

#[macro_use]
extern crate unhtml_derive;
extern crate unhtml;
use unhtml::{self, FromHtml};


#[derive(FromHtml)]
struct SingleString {
    value: String,
}

selector

target

derive targetfield

字面类型

字符串

specification

选择器必须是有效的 CSS 选择器,无效的选择器将导致编译时 panic

// panic
#[macro_use]
extern crate unhtml_derive;
extern crate unhtml;
use unhtml::{self, FromHtml};

#[derive(FromHtml)]
#[html(selector = "<>")]
struct SingleUser {}
// panic
#[macro_use]
extern crate unhtml_derive;
extern crate unhtml;
use unhtml::*;

#[derive(FromHtml)]
struct SingleUser {
    #[html(selector = "<>", attr = "inner")]
    name: String,
}

如果选择多个元素且字段类型不是 Vec,则选择第一个


#[macro_use]
extern crate unhtml_derive;
extern crate unhtml;
use unhtml::{self, FromHtml};

#[derive(FromHtml)]
#[html(selector = "a")]
struct Link {
    #[html(attr = "href")]
    href: String,

    #[html(attr = "inner")]
    value: String,
}

let link = Link::from_html(r#"
<a href="https://github.com">Github</a> 
<a href="https://google.com">Google</a> 
"#).unwrap();
assert_eq!("https://github.com", &link.href);
assert_eq!("Github", &link.value);
默认行为

根元素的 HTML


#[macro_use]
extern crate unhtml_derive;
extern crate unhtml;
use unhtml::{self, FromHtml};

#[derive(FromHtml)]
#[html(selector = "a")]
struct Link {
    #[html(attr = "href")]
    href: String,

    #[html(attr = "inner")]
    value: String,
}

let link = Link::from_html(r#"<a href="https://github.com">Github</a>"#).unwrap();
assert_eq!("https://github.com", &link.href);
assert_eq!("Github", &link.value);

attr

target

字段

字面类型

字符串

specification
  • inner 指的是 innerHtml
  • 任何其他 attr 指的是 html 元素属性
#[macro_use]
extern crate unhtml_derive;
extern crate unhtml;
use unhtml::{self, FromHtml};

#[derive(FromHtml)]
#[html(selector = "a")]
struct Link {
    #[html(attr = "href")]
    href: String,

    #[html(attr = "inner")]
    value: String,
}

let link = Link::from_html(r#"<a href="https://github.com">Github</a>"#).unwrap();
assert_eq!("https://github.com", &link.href);
assert_eq!("Github", &link.value);
默认行为

整个元素的 HTML(不是 innerHtml!)

#[macro_use]
extern crate unhtml_derive;
extern crate unhtml;
use unhtml::{self, FromHtml};

#[derive(FromHtml)]
#[html(selector = "a")]
struct Link {
    #[html(attr = "href")]
    href: String,

    #[html(attr = "inner")]
    value: String,
    
    source: String,
}

let link = Link::from_html(r#"<a href="https://github.com">Github</a>"#).unwrap();
assert_eq!("https://github.com", &link.href);
assert_eq!("Github", &link.value);
assert_eq!(r#"<a href="https://github.com">Github</a>"#, &link.source);

default

target

字段

字面类型

任何 literal 类型

specification
  • field 相同的类型
#[macro_use]
extern crate unhtml_derive;
extern crate unhtml;
use unhtml::{self, FromHtml};

#[derive(FromHtml)]
struct DefaultUser {
    // invoke String::from_html
    #[html(selector = "#non-exist", default = "Hexilee")]
    name: String,

    // invoke u8::from<u8>
    #[html(default = 20)]
    age: u8,

    #[html(default = true)]
    like_lemon: bool,
}

let user = DefaultUser::from_html("<p></p>").unwrap();
assert_eq!("Hexilee", &user.name);
assert_eq!(20, user.age);
assert_eq!(-1000, user.assets);
assert!(user.like_lemon);
  • 字符串
#[macro_use]
extern crate unhtml_derive;
extern crate unhtml;
use unhtml::{self, FromHtml};

#[derive(FromHtml)]
#[html(selector = "a")]
struct Link {
    #[html(attr = "href")]
    href: String,

    #[html(attr = "inner")]
    value: String,
}

#[derive(FromHtml)]
struct Website {
    #[html(default = "10")]
    age: u8,

    #[html(default = "<a href='https://github.com'>Github</a>")]
    link: Link,
}

let website = Website::from_html("<p></p>").unwrap();
let link = website.link;
assert_eq!(10u8, website.age);
assert_eq!("https://github.com", &link.href);
assert_eq!("Github", &link.value);
默认行为

当选择无内容时返回 Err(unhtml::failure::Error)

// panic
#[macro_use]
extern crate unhtml_derive;
extern crate unhtml;
use unhtml::{self, FromHtml};

#[derive(FromHtml)]

#[html(selector = "a")]struct Link {
    // no default
    #[html(attr = "href")]
    href: String,

    #[html(attr = "inner")]
    value: String,
}

let link = Link::from_html(r#"<a>Github</a>"#).unwrap();

字段类型

实现 FromHtml 的任何类型,不使用泛型
// panic
#[macro_use]
extern crate unhtml_derive;
extern crate unhtml;
use unhtml::{self, FromHtml};

#[derive(FromHtml)]

#[html(selector = "a")]struct Link {
    // no default
    #[html(attr = "href")]
    href: &str,
}
// panic
#[macro_use]
extern crate unhtml_derive;
extern crate unhtml;
use unhtml::{self, FromHtml};

#[derive(FromHtml)]
struct Website {
    // no default
    #[html(attr = "href")]
    hrefs: std::collections::LinkedList<String>,
}
// panic
#[macro_use]
extern crate unhtml_derive;
extern crate unhtml;
use unhtml::{self, FromHtml};

#[derive(FromHtml)]
struct Website {
    // no default
    #[html(attr = "href")]
    hrefs: [String],
}
Vec

应使用 use unhtml::VecFromHtml

extern crate unhtml_derive;
extern crate unhtml;
use unhtml::{self, FromHtml, VecFromHtml};

#[derive(FromHtml)]
struct TestUser {
    #[html(selector = "p:nth-child(1)", attr = "inner")]
    name: String,

    #[html(selector = "p:nth-child(2)", attr = "inner")]
    age: u8,

    #[html(selector = "p:nth-child(3)", attr = "inner")]
    like_lemon: bool,
}

#[derive(FromHtml)]
#[html(selector = "#test")]
struct TestUsers {
    #[html(selector = "div")]
    users: Vec<TestUser>,
}

let users = TestUsers::from_html(r#"<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
    <div id="test">
        <div>
            <p>Hexilee</p>
            <p>20</p>
            <p>true</p>
        </div>
        <div>
            <p>BigBrother</p>
            <p>21</p>
            <p>false</p>
        </div>
    </div>
</body>
</html>"#).unwrap();
let hexilee = &users.users[0];
let big_brother = &users.users[1];
assert_eq!("Hexilee", &hexilee.name);
assert_eq!(20, hexilee.age);
assert!(hexilee.like_lemon);
assert_eq!("BigBrother", &big_brother.name);
assert_eq!(21, big_brother.age);
assert!(!big_brother.like_lemon);

作为 crate unhtml 的文档,如果您想直接使用 Vec<TestUser>,则可以这样做

extern crate unhtml_derive;
extern crate unhtml;
use unhtml::{self, VecFromHtml};

#[derive(FromHtml)]
struct TestUser {
    #[html(selector = "p:nth-child(1)", attr = "inner")]
    name: String,

    #[html(selector = "p:nth-child(2)", attr = "inner")]
    age: u8,

    #[html(selector = "p:nth-child(3)", attr = "inner")]
    like_lemon: bool,
}

let users = Vec::<TestUser>::from_html("#test > div", r#"<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
    <div id="test">
        <div>
            <p>Hexilee</p>
            <p>20</p>
            <p>true</p>
        </div>
        <div>
            <p>BigBrother</p>
            <p>21</p>
            <p>false</p>
        </div>
    </div>
</body>
</html>"#).unwrap();
let hexilee = &users[0];
let big_brother = &users[1];
assert_eq!("Hexilee", &hexilee.name);
assert_eq!(20, hexilee.age);
assert!(hexilee.like_lemon);
assert_eq!("BigBrother", &big_brother.name);
assert_eq!(21, big_brother.age);
assert!(!big_brother.like_lemon);

源HTML

带有顶级选择器

所有源 HTML 都将解析为 fragment。顶级元素是 html,且没有 DOCTYPEheadbody

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
    <div id="test">
        <div>
            <p>Hexilee</p>
            <p>20</p>
            <p>true</p>
        </div>
    </div>
</body>
</html>

将解析为

<html lang="en">
    <meta charset="UTF-8">
    <title>Title</title>
    <div id="test">
        <div>
            <p>Hexilee</p>
            <p>20</p>
            <p>true</p>
        </div>
    </div>
</html>

<p>Hexilee</p>

将解析为

<html>
    <p>Hexilee</p>
</html>    

// panic

extern crate unhtml_derive;
extern crate unhtml;
use unhtml::{self, FromHtml};

#[derive(FromHtml)]
struct Document {
    // no default
    #[html(selector = "head")]
    head: String,

    #[html(selector = "body")]
    body: String,
}

let dicument = Document::from_html(r#"<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
    <div id="test">
        <div>
            <p>Hexilee</p>
            <p>20</p>
            <p>true</p>
        </div>
    </div>
</body>
</html>"#).unwrap();
没有顶级选择器

当派生结构体没有 top selector 时,所有源HTML将被解析为 片段。没有 DOCTYPEhtmlheadbody

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
    <div id="test">
        <div>
            <p>Hexilee</p>
            <p>20</p>
            <p>true</p>
        </div>
    </div>
</body>
</html>

将解析为

<meta charset="UTF-8">
<title>Title</title>
<div id="test">
   <div>
       <p>Hexilee</p>
       <p>20</p>
       <p>true</p>
   </div>
</div>

<p>Hexilee</p>

将解析为

<p>Hexilee</p>
#[macro_use]
extern crate unhtml_derive;
extern crate unhtml;
use unhtml::{self, FromHtml};

#[derive(FromHtml)]
#[html(selector = "a")]
struct Link {
    #[html(attr = "href")]
    href: String,

    #[html(attr = "inner")]
    value: String,
}

let link = Link::from_html(r#"<a href="https://github.com">Github</a>"#).unwrap();
assert_eq!("https://github.com", &link.href);
assert_eq!("Github", &link.value);

依赖项

~4–9MB
~93K SLoC