feat: support index other files & increase desktop size
Signed-off-by: zu1k <i@zu1k.com>pull/34/head
parent
6b0a4d5bf0
commit
e3c4964ff2
2
Makefile
2
Makefile
|
@ -12,7 +12,7 @@ frontend_preinstall:
|
|||
frontend:
|
||||
pnpm -C frontend run build
|
||||
|
||||
build:
|
||||
build: frontend
|
||||
ifeq (${TARGET}, release)
|
||||
cargo build -p zlib-searcher --release
|
||||
else
|
||||
|
|
|
@ -93,6 +93,8 @@ Download `zlib_index_books.csv.zip` and `libgen_index_books.csv.zip` and extract
|
|||
|
||||
Then run `zlib-searcher index`. You may need to `rm index/*` first.
|
||||
|
||||
If you have other csv files, you can run `zlib-searcher index -f *.csv` to index them.
|
||||
|
||||
The finally folder structure should look like this:
|
||||
|
||||
```
|
||||
|
|
|
@ -4,6 +4,7 @@ use log::info;
|
|||
use std::{
|
||||
fs::File,
|
||||
io::{BufRead, BufReader},
|
||||
path::Path,
|
||||
};
|
||||
use sysinfo::{System, SystemExt};
|
||||
use tantivy::doc;
|
||||
|
@ -33,56 +34,52 @@ fn get_memory_arena_num_bytes() -> usize {
|
|||
}
|
||||
|
||||
impl Searcher {
|
||||
pub fn index(&mut self) {
|
||||
pub fn index(&mut self, csv_file: impl AsRef<Path>) {
|
||||
let mut writer = self.index.writer(get_memory_arena_num_bytes()).unwrap();
|
||||
|
||||
let mut do_index = move |csv_file: &str| {
|
||||
let file = File::open(csv_file).unwrap();
|
||||
let reader = BufReader::new(file);
|
||||
let file = File::open(&csv_file).unwrap();
|
||||
let reader = BufReader::new(file);
|
||||
|
||||
let mut rdr = csv::ReaderBuilder::new()
|
||||
.has_headers(false)
|
||||
.from_reader(reader);
|
||||
let mut rdr = csv::ReaderBuilder::new()
|
||||
.has_headers(false)
|
||||
.from_reader(reader);
|
||||
|
||||
let line_count = BufReader::new(File::open(csv_file).unwrap())
|
||||
.lines()
|
||||
.count();
|
||||
let style = ProgressStyle::default_bar()
|
||||
.template("[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}")
|
||||
.unwrap();
|
||||
let bar = ProgressBar::new(line_count as u64)
|
||||
.with_message(format!("Indexing {}", csv_file))
|
||||
.with_style(style);
|
||||
for result in rdr.deserialize::<Book>().progress_with(bar) {
|
||||
match result {
|
||||
Ok(item) => {
|
||||
if let Err(err) = writer.add_document(doc!(
|
||||
self.id => item.id,
|
||||
self.title => item.title,
|
||||
self.author => item.author,
|
||||
self.publisher => item.publisher,
|
||||
self.extension => item.extension,
|
||||
self.filesize => item.filesize,
|
||||
self.language => item.language,
|
||||
self.year => item.year,
|
||||
self.pages => item.pages,
|
||||
self.isbn => item.isbn,
|
||||
self.ipfs_cid => item.ipfs_cid,
|
||||
)) {
|
||||
println!("{err}");
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
let line_count = BufReader::new(File::open(&csv_file).unwrap())
|
||||
.lines()
|
||||
.count();
|
||||
let style = ProgressStyle::default_bar()
|
||||
.template("[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}")
|
||||
.unwrap();
|
||||
let bar = ProgressBar::new(line_count as u64)
|
||||
.with_message(format!("Indexing {}", csv_file.as_ref().to_str().unwrap()))
|
||||
.with_style(style);
|
||||
for result in rdr.deserialize::<Book>().progress_with(bar) {
|
||||
match result {
|
||||
Ok(item) => {
|
||||
if let Err(err) = writer.add_document(doc!(
|
||||
self.id => item.id,
|
||||
self.title => item.title,
|
||||
self.author => item.author,
|
||||
self.publisher => item.publisher,
|
||||
self.extension => item.extension,
|
||||
self.filesize => item.filesize,
|
||||
self.language => item.language,
|
||||
self.year => item.year,
|
||||
self.pages => item.pages,
|
||||
self.isbn => item.isbn,
|
||||
self.ipfs_cid => item.ipfs_cid,
|
||||
)) {
|
||||
println!("{err}");
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
println!("{err}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
writer.commit().unwrap();
|
||||
};
|
||||
|
||||
do_index("zlib_index_books.csv");
|
||||
do_index("libgen_index_books.csv");
|
||||
writer.commit().unwrap();
|
||||
writer.wait_merging_threads().expect("merge complete");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -8,12 +8,7 @@ impl Searcher {
|
|||
|
||||
let mut query_parser = QueryParser::for_index(
|
||||
&self.index,
|
||||
vec![
|
||||
self.title,
|
||||
self.author,
|
||||
self.publisher,
|
||||
self.isbn,
|
||||
],
|
||||
vec![self.title, self.author, self.publisher, self.isbn],
|
||||
);
|
||||
query_parser.set_conjunction_by_default();
|
||||
let query = query_parser.parse_query(query).unwrap();
|
||||
|
|
|
@ -55,10 +55,10 @@
|
|||
"windows": [
|
||||
{
|
||||
"fullscreen": false,
|
||||
"height": 800,
|
||||
"height": 900,
|
||||
"resizable": true,
|
||||
"title": "zLib Searcher",
|
||||
"width": 1200
|
||||
"width": 1500
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@ use actix_web_static_files::ResourceFiles;
|
|||
use clap::Parser;
|
||||
use log::{info, LevelFilter};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::Arc;
|
||||
use std::{path::PathBuf, sync::Arc};
|
||||
use zlib_searcher_core::{Book, Searcher};
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/generated.rs"));
|
||||
|
@ -62,7 +62,7 @@ enum SubCommand {
|
|||
/// run search webserver
|
||||
Run(Run),
|
||||
/// index the raw data
|
||||
Index,
|
||||
Index(Index),
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
|
@ -76,13 +76,19 @@ struct Run {
|
|||
bind: String,
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
struct Index {
|
||||
#[clap(short, long, num_args=1.., help = "specify csv file to be indexed")]
|
||||
file: Vec<PathBuf>,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
env_logger::builder().filter_level(LevelFilter::Info).init();
|
||||
|
||||
let args = AppOpts::parse();
|
||||
match args.subcmd {
|
||||
SubCommand::Run(opts) => run(opts).unwrap(),
|
||||
SubCommand::Index => index(),
|
||||
SubCommand::Index(opts) => index(opts),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -113,7 +119,7 @@ async fn run(opts: Run) -> std::io::Result<()> {
|
|||
.await
|
||||
}
|
||||
|
||||
fn index() {
|
||||
fn index(opts: Index) {
|
||||
let index_dir = std::env::current_exe()
|
||||
.unwrap()
|
||||
.parent()
|
||||
|
@ -123,5 +129,12 @@ fn index() {
|
|||
.unwrap()
|
||||
.to_string();
|
||||
let mut searcher = Searcher::new(&index_dir);
|
||||
searcher.index();
|
||||
|
||||
if opts.file.is_empty() {
|
||||
vec!["zlib_index_books.csv", "libgen_index_books.csv"]
|
||||
.iter()
|
||||
.for_each(|file| searcher.index(file));
|
||||
} else {
|
||||
opts.file.iter().for_each(|file| searcher.index(file));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue