8 个重大版本发布

0.9.0 2020年11月6日
0.7.0 2020年8月28日
0.5.0 2020年7月31日

#8 in #lucene

36 每月下载量

MIT 许可证

2.5MB
2.5K SLoC

JSON-Surf

搜索/分析 JSON 和 Rust 结构

Build Status codecov Version RepoSize Crates.io Crates.io Crates.io Contributors Gitter

功能

  • 全文/术语搜索 (sqlish!!!)
  • 易于读取、写入和删除 API
  • 序列化 扁平 JSON/Struct
  • 一次写入多个文档
  • 支持模糊词搜索(见示例)
  • 无需运行时
  • 无不安全块
  • 在 rust stable 上运行
  • 即将推出:双词建议 & TF-IDF 支持

动机

  • 允许您的现有扁平 rust 结构可搜索
  • 当 tantivy 支持任意字节流时,该包将支持任意字节流(见此处
  • 这可以仅作为数据库中实际数据的容器,保持索引轻量
  • 创建可能更新/删除的时间感知容器
  • 为请求/响应创建临时存储
  • 这可以与任何 Web 服务器集成以实时索引和搜索
  • 该包将主要关注与用户工作流程相关的问题
  • 底层使用 tantivy

待办事项

  • 添加更多示例
  • 删除任何进一步的复制
  • 引入更多的维护 API(如果需要)

快速入门

先决条件

 [dependencies]
 json-surf = "*"

示例

use std::convert::TryFrom;
use std::hash::{Hash, Hasher};
use std::cmp::{Ord, Ordering, Eq};


use std::fs::remove_dir_all;

use serde::{Serialize, Deserialize};

use json_surf::prelude::*;


// Main struct
#[derive(Serialize, Debug, Deserialize, PartialEq, PartialOrd, Clone)]
struct UserInfo {
    first: String,
    last: String,
    age: u8,
}

impl UserInfo {
    pub fn new(first: String, last: String, age: u8) -> Self {
        Self {
            first,
            last,
            age,
        }
    }
}

impl Default for UserInfo {
    fn default() -> Self {
        let first = "".to_string();
        let last = "".to_string();
        let age = 0u8;
        UserInfo::new(first, last, age)
    }
}

fn main() {
    // Specify home location for indexes
    let home = ".store".to_string();
    // Specify index name
    let index_name = "usage".to_string();

    // Prepare builder
    let mut builder = SurferBuilder::default();
    builder.set_home(&home);

    let data = UserInfo::default();
    builder.add_struct(index_name.clone(), &data);

    // Prepare Surf
    let mut surf = Surf::try_from(builder).unwrap();

    // Prepare data to insert & search

    // User 1: John Doe
    let first = "John".to_string();
    let last = "Doe".to_string();
    let age = 20u8;
    let john_doe = UserInfo::new(first, last, age);

    // User 2: Jane Doe
    let first = "Jane".to_string();
    let last = "Doe".to_string();
    let age = 18u8;
    let jane_doe = UserInfo::new(first, last, age);

    // See examples for more options
    let users = vec![john_doe.clone(), jane_doe.clone()];
    let _ = surf.insert(&index_name, &users).unwrap();

    block_thread(1);

    // See examples for more options
    // Similar to SELECT * FROM users WHERE (age = 20 AND last = "Doe") OR (first = "Jane")
    let conditions = vec![
        OrCondition::new(
           // (age = 20 AND last = "Doe")
            vec![
                AndCondition::new("age".to_string(), "20".to_string()),
                AndCondition::new("last".to_string(), "doe".to_string())
            ]),
        OrCondition::new(
           // (first = "Jane")
            vec![
                AndCondition::new("first".to_string(), "jane".to_string())
            ])
    ];

    // Validate John and Jane Doe
    let mut computed = surf.select::<UserInfo>(&index_name, &conditions).unwrap().unwrap();
    let mut expected = vec![john_doe.clone(), jane_doe.clone(), ];
    expected.sort();
    computed.sort();
    assert_eq!(expected, computed);

    // Validated John's record - Alternate shortcut for query using one field only
    let computed = surf.read_all_structs_by_field(&index_name, "age", "20");
    let computed: Vec<UserInfo> = computed.unwrap().unwrap();
    assert_eq!(vec![john_doe], computed);

    // Delete John's record
    let result = surf.delete(&index_name, "age", "20");
    assert!(result.is_ok());

    // John's was removed
    let computed = surf.read_all_structs_by_field(&index_name, "age", "20");
    let computed: Vec<UserInfo> = computed.unwrap().unwrap();
    assert!(computed.is_empty());


    // Clean-up
    let path = surf.which_index(&index_name).unwrap();
    let _ = remove_dir_all(&path);
    let _ = remove_dir_all(&home);
}

/// Ignore all of this
/// Convenience method for sorting & likely not required in user code
impl Ord for UserInfo {
    fn cmp(&self, other: &Self) -> Ordering {
        if self.first == other.first && self.last == other.last {
            return Ordering::Equal;
        };
        if self.first == other.first {
            if self.last > other.last {
                Ordering::Greater
            } else {
                Ordering::Less
            }
        } else {
            if self.first > other.first {
                Ordering::Greater
            } else {
                Ordering::Less
            }
        }
    }
}

/// Ignore all of this
/// Convenience method for sorting & likely not required in user code
impl Eq for UserInfo {}

/// Ignore all of this
/// Convenience method for sorting & likely not required in user code
impl Hash for UserInfo {
    fn hash<H: Hasher>(&self, state: &mut H) {
        for i in self.first.as_bytes() {
            state.write_u8(*i);
        }
        for i in self.last.as_bytes() {
            state.write_u8(*i);
        }
        state.write_u8(self.age);
        state.finish();
    }
}

依赖项

~19–30MB
~454K SLoC