feat: support index other files & increase desktop size

Signed-off-by: zu1k <i@zu1k.com>
pull/34/head
zu1k 2022-12-02 20:43:16 +08:00
parent 6b0a4d5bf0
commit e3c4964ff2
No known key found for this signature in database
GPG Key ID: AE381A8FB1EF2CC8
6 changed files with 62 additions and 55 deletions

View File

@ -12,7 +12,7 @@ frontend_preinstall:
frontend:
pnpm -C frontend run build
build:
build: frontend
ifeq (${TARGET}, release)
cargo build -p zlib-searcher --release
else

View File

@ -93,6 +93,8 @@ Download `zlib_index_books.csv.zip` and `libgen_index_books.csv.zip` and extract
Then run `zlib-searcher index`. You may need to `rm index/*` first.
If you have other csv files, you can run `zlib-searcher index -f *.csv` to index them.
The finally folder structure should look like this:
```

View File

@ -4,6 +4,7 @@ use log::info;
use std::{
fs::File,
io::{BufRead, BufReader},
path::Path,
};
use sysinfo::{System, SystemExt};
use tantivy::doc;
@ -33,56 +34,52 @@ fn get_memory_arena_num_bytes() -> usize {
}
impl Searcher {
pub fn index(&mut self) {
pub fn index(&mut self, csv_file: impl AsRef<Path>) {
let mut writer = self.index.writer(get_memory_arena_num_bytes()).unwrap();
let mut do_index = move |csv_file: &str| {
let file = File::open(csv_file).unwrap();
let reader = BufReader::new(file);
let file = File::open(&csv_file).unwrap();
let reader = BufReader::new(file);
let mut rdr = csv::ReaderBuilder::new()
.has_headers(false)
.from_reader(reader);
let mut rdr = csv::ReaderBuilder::new()
.has_headers(false)
.from_reader(reader);
let line_count = BufReader::new(File::open(csv_file).unwrap())
.lines()
.count();
let style = ProgressStyle::default_bar()
.template("[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}")
.unwrap();
let bar = ProgressBar::new(line_count as u64)
.with_message(format!("Indexing {}", csv_file))
.with_style(style);
for result in rdr.deserialize::<Book>().progress_with(bar) {
match result {
Ok(item) => {
if let Err(err) = writer.add_document(doc!(
self.id => item.id,
self.title => item.title,
self.author => item.author,
self.publisher => item.publisher,
self.extension => item.extension,
self.filesize => item.filesize,
self.language => item.language,
self.year => item.year,
self.pages => item.pages,
self.isbn => item.isbn,
self.ipfs_cid => item.ipfs_cid,
)) {
println!("{err}");
}
}
Err(err) => {
let line_count = BufReader::new(File::open(&csv_file).unwrap())
.lines()
.count();
let style = ProgressStyle::default_bar()
.template("[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}")
.unwrap();
let bar = ProgressBar::new(line_count as u64)
.with_message(format!("Indexing {}", csv_file.as_ref().to_str().unwrap()))
.with_style(style);
for result in rdr.deserialize::<Book>().progress_with(bar) {
match result {
Ok(item) => {
if let Err(err) = writer.add_document(doc!(
self.id => item.id,
self.title => item.title,
self.author => item.author,
self.publisher => item.publisher,
self.extension => item.extension,
self.filesize => item.filesize,
self.language => item.language,
self.year => item.year,
self.pages => item.pages,
self.isbn => item.isbn,
self.ipfs_cid => item.ipfs_cid,
)) {
println!("{err}");
}
}
Err(err) => {
println!("{err}");
}
}
}
writer.commit().unwrap();
};
do_index("zlib_index_books.csv");
do_index("libgen_index_books.csv");
writer.commit().unwrap();
writer.wait_merging_threads().expect("merge complete");
}
}

View File

@ -8,12 +8,7 @@ impl Searcher {
let mut query_parser = QueryParser::for_index(
&self.index,
vec![
self.title,
self.author,
self.publisher,
self.isbn,
],
vec![self.title, self.author, self.publisher, self.isbn],
);
query_parser.set_conjunction_by_default();
let query = query_parser.parse_query(query).unwrap();

View File

@ -55,10 +55,10 @@
"windows": [
{
"fullscreen": false,
"height": 800,
"height": 900,
"resizable": true,
"title": "zLib Searcher",
"width": 1200
"width": 1500
}
]
}

View File

@ -5,7 +5,7 @@ use actix_web_static_files::ResourceFiles;
use clap::Parser;
use log::{info, LevelFilter};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use std::{path::PathBuf, sync::Arc};
use zlib_searcher_core::{Book, Searcher};
include!(concat!(env!("OUT_DIR"), "/generated.rs"));
@ -62,7 +62,7 @@ enum SubCommand {
/// run search webserver
Run(Run),
/// index the raw data
Index,
Index(Index),
}
#[derive(Parser)]
@ -76,13 +76,19 @@ struct Run {
bind: String,
}
#[derive(Parser)]
struct Index {
#[clap(short, long, num_args=1.., help = "specify csv file to be indexed")]
file: Vec<PathBuf>,
}
fn main() {
env_logger::builder().filter_level(LevelFilter::Info).init();
let args = AppOpts::parse();
match args.subcmd {
SubCommand::Run(opts) => run(opts).unwrap(),
SubCommand::Index => index(),
SubCommand::Index(opts) => index(opts),
}
}
@ -113,7 +119,7 @@ async fn run(opts: Run) -> std::io::Result<()> {
.await
}
fn index() {
fn index(opts: Index) {
let index_dir = std::env::current_exe()
.unwrap()
.parent()
@ -123,5 +129,12 @@ fn index() {
.unwrap()
.to_string();
let mut searcher = Searcher::new(&index_dir);
searcher.index();
if opts.file.is_empty() {
vec!["zlib_index_books.csv", "libgen_index_books.csv"]
.iter()
.for_each(|file| searcher.index(file));
} else {
opts.file.iter().for_each(|file| searcher.index(file));
}
}