黑马程序员技术交流社区

标题: 【广州校区】+【原创】lucene [打印本页]

作者: 余大麻    时间: 2018-12-6 11:04
标题: 【广州校区】+【原创】lucene
引入lucene数据库搜索数据分类A.结构化数据B.非结构化数据非结构化数据查询方法1.顺序扫描法(Serial Scanning)2.全文检索(Full-text Search)全文检索使用场景Lucene实现全文检索的流程一:创建索引库二:通过索引库搜索1.创建文件对象2.分析文档3.创建索引4.创建查询配置开发环境lucene下载使用jar包创建索引库Field域实现类

Field域实现类Field类数据类型是否分析是否索引是否存储说明
StringField字符串NYY或Neg:订单号
LongFieldLongYYY或Neg:价格
StoredField重载方法,支持多类型NNYeg:文件地址
TextField字符串或流YYY或N内容
[Java] 纯文本查看 复制代码
public void createIndex() throws Exception {
    "创建IndexWriter"
    Directory directory = FSDirectory.open(new File("C:\\Users\\82545\\Desktop\\index"));
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
    IndexWriter indexWriter = new IndexWriter(directory, config);
    "创建Document & Field"
    File dir = new File("C:\\Users\\82545\\Desktop\\j2ee\\searchsourceFileDir");
    File[] files = dir.listFiles();
    for (File file : files) {
        String fileName = file.getName();
        long fileSize = file.length();
        String fileContent = FileUtils.readFileToString(file);
        String filePath = file.getPath();
        "-----------------"
        Field file_name = new TextField("file_name", fileName, Store.YES);
        Field file_content = new TextField("file_content", fileContent, Store.YES);
        Field file_path = new StoredField("file_path", filePath);
        Field file_size = new LongField("file_size", fileSize, Store.YES);
        "-----------------"
        Document document = new Document();
        document.add(file_name);
        document.add(file_content);
        document.add(file_path);
        document.add(file_size);
        "创建索引,并写入索引库"
        indexWriter.addDocument(document);
    }
    indexWriter.close();
}

查询索引
[Java] 纯文本查看 复制代码
public void search() throws Exception {
    "创建IndexReader & IndexSearcher"
    Directory directory = FSDirectory.open(new File("C:\\Users\\82545\\Desktop\\index"));
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
   
    Query query = new TermQuery(new Term("file_name", "apache"));
    TopDocs topDocs = indexSearcher.search(query, 10);
    System.out.println("查询结果的总条数:"+ topDocs.totalHits);
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
            Document document = indexSearcher.doc(scoreDoc.doc);
            System.out.println(document.get("file_name"));
            //System.out.println(document.get("file_content"));
            System.out.println(document.get("file_path"));
            System.out.println(document.get("file_size"));
    }
    indexReader.close();
}


支持中文分析器(Analyzer)使用方法索引库的维护索引库的添加
[Java] 纯文本查看 复制代码
public void addDocument() throws Exception {
    Directory directory = FSDirectory.open(new File("D:\\temp\\0108\\index"));
    IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, new IKAnalyzer());
    IndexWriter indexWriter = new IndexWriter(directory, config);
    Document document = new Document();
    document.add(new TextField("filename", "新添加的文档", Store.YES));
    document.add(new TextField("content", "新添加的文档的内容", Store.NO));
    document.add(new TextField("content", "新添加的文档的内容第二个content", Store.YES));
    document.add(new TextField("content1", "新添加的文档的内容要能看到", Store.YES));
    indexWriter.addDocument(document);
    indexWriter.close();
}


索引库删除(全部删除)
[Java] 纯文本查看 复制代码
public void deleteAllIndex() throws Exception {
    Directory directory = FSDirectory.open(new File("D:\\temp\\0108\\index"));
    IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, new IKAnalyzer());
    IndexWriter indexWriter = new IndexWriter(directory, config);
    indexWriter.deleteAll();
    indexWriter.close ();
}
public void deleteAllIndex() throws Exception {
    Directory directory = FSDirectory.open(new File("D:\\temp\\0108\\index"));
    IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, new IKAnalyzer());
    IndexWriter indexWriter = new IndexWriter(directory, config);
   
    Query query = new TermQuery(new Term("fileName","apache"));
    indexWriter.deleteDocuments(query)
    indexWriter.close();
   
}




索引库删除(指定查询条件删除)
[Java] 纯文本查看 复制代码
public void updateIndex() throws Exception {
    IndexWriter indexWriter = getIndexWriter();
    Document document = new Document();
    document.add(new TextField("filename", "要更新的文档", Store.YES));
    String text = "2013年11月18日 - Lucene 简介 Lucene 是一个基于 Java的全文信息检索工具包,";
    text += "它不是一个完整的搜索应用程序,而是为你的应用程序提供索引和搜索功能.";
    document.add(new TextField("content", text, Store.YES));
    indexWriter.updateDocument(new Term("content", "java"), document);
    indexWriter.close();
}

ucene索引库查询(2种方式)(重点)一: 使用query的子类查询
[Java] 纯文本查看 复制代码
public void testMatchAllDocsQuery() throws Exception {
    Directory directory = FSDirectory.open(new File("C:\\Users\\82545\\Desktop\\index"));
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
   
    Query query = new MatchAllDocsQuery();
   
    TopDocs topDocs = indexSearcher.search(query, 10);
    System.out.println("查询结果的总条数:"+ topDocs.totalHits);
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
            Document document = indexSearcher.doc(scoreDoc.doc);
            System.out.println(document.get("file_name"));
            //System.out.println(document.get("file_content"));
            System.out.println(document.get("file_path"));
            System.out.println(document.get("file_size"));
    }
    indexReader.close();
}


TermQuery(精准查询)
[Java] 纯文本查看 复制代码
public void testTermQuery() throws Exception {
    IndexSearcher indexSearcher = getIndexSearcher();
    Query query = new TermQuery(new Term("file_content", "lucene"));
    TopDocs topDocs = indexSearcher.search(query, 10);
    System.out.println("查询结果总数量:" + topDocs.totalHits);
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        Document document = indexSearcher.doc(scoreDoc.doc);
        System.out.println(document.get("file_name"));
        //System.out.println(document.get("file_content"));
        System.out.println(document.get("file_path"));
        System.out.println(document.get("file_size"));
    }
    indexSearcher.getIndexReader().close();
}


NumericRangeQuery(按区间查)
[Java] 纯文本查看 复制代码
public void testNumericRangeQuery() throws Exception {
    Directory directory = FSDirectory.open(new File("C:\\Users\\82545\\Desktop\\index"));
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
   
    Query query = NumericRangeQuery.newLongRange("size", 1l, 1000l, true, true);
    printResult(query, indexSearcher);
}


BooleanQuery(组合查询)
[Java] 纯文本查看 复制代码
public void testBooleanQuery() throws Exception {
    IndexSearcher indexSearcher = getIndexSearcher();
   
    BooleanQuery query = new BooleanQuery();
    Query query1 = new TermQuery(new Term("filename", "apache"));
    Query query2 = new TermQuery(new Term("content", "apache"));
    query.add(query1, Occur.MUST);//and
    query.add(query2, Occur.MUST);//and
   
    printResult(query, indexSearcher);
}


二: 使用queryparser查询(用到分析器)
[Java] 纯文本查看 复制代码
public void testQueryParser() throws Exception {
    IndexSearcher indexSearcher = getIndexSearcher();
   
    QueryParser queryParser = new QueryParser("content", new IKAnalyzer());//默认的域,分词器
    //Query query = queryParser.parse("fileName:apache");
    Query query = queryParser.parse("Lucene是java开发的");
   
    printResult(query, indexSearcher);
}













欢迎光临 黑马程序员技术交流社区 (http://bbs.itheima.com/) 黑马程序员IT技术论坛 X3.2