ElasticSearch学习3 - SpringBoot

x33g5p2x  于2022-05-05 转载在 ElasticSearch  
字(19.1k)|赞(0)|评价(0)|浏览(546)

概述

文档: https://www.elastic.co/guide/en/elasticsearch/client/index.html

SpringBoot

引入依赖
<properties>
    <java.version>1.8</java.version>
    <spring-native.version>0.10.0-SNAPSHOT</spring-native.version>

    <!--必须跟你当前的es版本一致-->
    <elasticsearch.version>7.6.1</elasticsearch.version>
</properties>        

<dependencies>
    <!-- https://mvnrepository.com/artifact/org.springframework.boot/spring-boot-starter-data-elasticsearch -->
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
    </dependency>
</dependencies>
配置Es客户端 - 可直接使用默认的无需配置
@Configuration
@EnableConfigurationProperties(value = {MyConfig.class})
public class SpringConfig {

    @Bean("restHighLevelClient")
    @Primary
    public RestHighLevelClient restHighLevelClient() {
        RestClientBuilder httpClientBuilder = RestClient.builder(
                new HttpHost("localhost", 9200)
        );

        // Create the HLRC
        RestHighLevelClient hlrc = new RestHighLevelClient(httpClientBuilder);

        return hlrc;
    }

}
创建、删除、是否存在索引(表)
package top.linruchang.springbooottest.controller;

import cn.hutool.core.lang.Console;
import lombok.SneakyThrows;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.client.indices.CreateIndexResponse;
import org.elasticsearch.client.indices.GetIndexRequest;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import top.linruchang.springbooottest.service.ArticleService;

import java.io.IOException;
import java.util.stream.Stream;

/**
 * 作用:
 *
 * @author LinRuChang
 * @version 1.0
 * @date 2022/03/28
 * @since 1.8
 **/

@SpringBootTest
public class ElasticSearchTest {

    @Autowired
    RestHighLevelClient restHighLevelClient;

    /**
     * 创建索引
     */
    @SneakyThrows
    @Test
    public void test1() {

        CreateIndexRequest createIndexRequest = new CreateIndexRequest("lrc_blog5");

        CreateIndexResponse createIndexResponse = restHighLevelClient.indices().create(createIndexRequest, RequestOptions.DEFAULT);

        Console.log(createIndexResponse);
    }

    /**
     * 判断索引是否存在
     */
    @SneakyThrows
    @Test
    public void test2() {
        Stream.of("lrc_blog4", "test_blog").forEach(indexName -> {
            GetIndexRequest getIndexRequest = new GetIndexRequest(indexName);
            try {
                boolean exists = restHighLevelClient.indices().exists(getIndexRequest, RequestOptions.DEFAULT);
                Console.log("索引【{}】是否存在:{}", indexName,exists);
            } catch (IOException e) {
                e.printStackTrace();
            }
        });
    }


    /**
     * 删除索引
     */
    @SneakyThrows
    @Test
    public void test3() {
        DeleteIndexRequest deleteIndexRequest = new DeleteIndexRequest("lrc_blog4");

        AcknowledgedResponse acknowledgedResponse = restHighLevelClient.indices().delete(deleteIndexRequest, RequestOptions.DEFAULT);
        Console.log(acknowledgedResponse.isAcknowledged());

    }
}
创建、查询、删除、更新文档操作
package top.linruchang.springbooottest.controller;

import cn.hutool.core.lang.Console;
import cn.hutool.core.lang.UUID;
import cn.hutool.core.util.ArrayUtil;
import com.alibaba.fastjson.JSON;
import lombok.SneakyThrows;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.get.GetRequest;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.action.update.UpdateResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.client.indices.CreateIndexResponse;
import org.elasticsearch.client.indices.GetIndexRequest;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;

import java.util.HashMap;
import java.util.Map;
import java.util.stream.IntStream;
import java.util.stream.Stream;

/**
 * 作用:
 *
 * @author LinRuChang
 * @version 1.0
 * @date 2022/03/28
 * @since 1.8
 **/

@SpringBootTest
public class ElasticSearchCRUDTest {

    @Autowired
    RestHighLevelClient restHighLevelClient;

    private final String indexName = "lrc_blog4";

    /**
     * 在运行每个@Test前都会先执行这个方法先
     */
    @SneakyThrows
    @BeforeEach
    public void createIndex() {
        GetIndexRequest getIndexRequest = new GetIndexRequest(indexName);
        boolean exists = restHighLevelClient.indices().exists(getIndexRequest, RequestOptions.DEFAULT);
        if(!exists) {
            CreateIndexRequest createIndexRequest = new CreateIndexRequest(indexName);
            CreateIndexResponse createIndexResponse = restHighLevelClient.indices().create(createIndexRequest, RequestOptions.DEFAULT);
            Console.log(createIndexResponse);
            Console.log("创建索引【{}】:{}",indexName,createIndexResponse.isAcknowledged());
        }else {
            Console.log("索引【{}】:已存在",indexName);
        }
    }

    /**
     * 创建文档
     */
    @SneakyThrows
    @Test
    public void test1() {
        IndexRequest indexRequest = new IndexRequest(indexName);
        indexRequest.timeout(TimeValue.timeValueSeconds(10));

        //指定新增文档的ID,不指定则ES自动生成
        //indexRequest.id("1");

        //文档
        Map sourceData = new HashMap<>();
        sourceData.put("title","科技兴国2");
        sourceData.put("date","2020-11-05");
        sourceData.put("content","f发送到发送到发生的范德萨");
        sourceData.put("age","6");
        sourceData.put("tags",new String[] {"男", "章法"});

        //数据设置
        indexRequest.source(JSON.toJSONString(sourceData), XContentType.JSON);

        //发送文档创建请求
        IndexResponse indexResponse = restHighLevelClient.index(indexRequest, RequestOptions.DEFAULT);

        //查看响应结果
        Console.log(indexResponse);
        Console.log(indexResponse.getResult());
        Console.log(indexResponse.status());

    }

    /**
     * 批量创建文档
     */
    @SneakyThrows
    @Test
    public void test11() {

        BulkRequest bulkRequest = new BulkRequest();

        Map sourceData = new HashMap<>();
        sourceData.put("title","科技兴国2");
        sourceData.put("date","2020-11-05");
        sourceData.put("content","f发送到发送到发生的范德萨");
        sourceData.put("age","6");
        sourceData.put("tags",new String[] {"男", "章法"});

        IntStream.range(0,10).forEach(num -> {
            IndexRequest indexRequest = new IndexRequest(indexName);
            indexRequest.id(UUID.fastUUID().toString(true) + num);
            indexRequest.source(sourceData);
            bulkRequest.add(indexRequest);
        });

        BulkResponse bulkResponse = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
        Console.log(bulkResponse);
        Console.log(bulkResponse.status());

    }

    /**
     * 根据文档ID获取文档记录
     */
    @SneakyThrows
    @Test
    public void test2() {
        String documentId = "gP-m0X8B8evNf5bY1MUH";
        GetRequest getRequest = new GetRequest(indexName,documentId);

        boolean exists = restHighLevelClient.exists(getRequest, RequestOptions.DEFAULT);
        if(exists) {
            //发送查询请求
            GetResponse documentFields = restHighLevelClient.get(getRequest, RequestOptions.DEFAULT);

            //响应结果
            Map<String, Object> sourceAsMap = documentFields.getSourceAsMap();
            Console.log(sourceAsMap);
        }else {
            Console.log("索引【{}】的文档【{}】不存在", indexName,documentId);
        }
    }

    /**
     * 更新文档 = 通过ID
     */
    @Test
    @SneakyThrows
    public void test3() {
        String documentId = "gP-m0X8B8evNf5bY1MUH";
        GetRequest getRequest = new GetRequest(indexName,documentId);
        boolean exists = restHighLevelClient.exists(getRequest, RequestOptions.DEFAULT);

        if(exists) {

            UpdateRequest updateRequest = new UpdateRequest(indexName,documentId);
            updateRequest.timeout(TimeValue.timeValueSeconds(10));

            //文档数据设置 == 不会整个覆盖原文档的记录 = 仅修改传入的信息
            Map sourceData = new HashMap<>();
            sourceData.put("sport","篮球");
            updateRequest.doc(sourceData);

            //发送请求、响应结果
            UpdateResponse updateResponse = restHighLevelClient.update(updateRequest, RequestOptions.DEFAULT);
            Console.log(updateResponse);
            Console.log(updateResponse.getGetResult());
            Console.log(updateResponse.status());

            //获取ES文档最新记录信息
            GetResponse getResponse = restHighLevelClient.get(getRequest, RequestOptions.DEFAULT);
            Map<String, Object> sourceAsMap = getResponse.getSourceAsMap();
            Console.log("更新后的文档记录:{}", sourceAsMap);
        }else {
            Console.log("索引【{}】的文档【{}】不存在", indexName,documentId);
        }

    }

    /**
     * 删除文档
     */
    @Test
    @SneakyThrows
    public void test4() {
        String documentId = "gf-10X8B8evNf5bYDsUv";
        DeleteRequest deleteRequest = new DeleteRequest(indexName,documentId);

        DeleteResponse deleteResponse = restHighLevelClient.delete(deleteRequest, RequestOptions.DEFAULT);
        Console.log(deleteResponse);
        Console.log(deleteResponse.status());

    }

    /**
     * 查询文档 = 查询条件
     */
    @Test
    @SneakyThrows
    public void test5() {

        SearchRequest searchRequest = new SearchRequest(indexName);

        //查询条件构建
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        searchSourceBuilder.timeout(TimeValue.timeValueSeconds(10));

        MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery("title", "科技");

        searchSourceBuilder.query(matchQueryBuilder);
        searchSourceBuilder.from(0);
        searchSourceBuilder.size(3);
        searchRequest.source(searchSourceBuilder);

        SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        Console.log(searchResponse);
        Console.log(searchResponse.status());

        SearchHits hits = searchResponse.getHits();
        SearchHit[] hits1 = hits.getHits();
        Console.log("查询结果总记录数:{}", ArrayUtil.length(hits1));
        Stream.of(hits1).forEach(hitsElem -> {
            Console.log("文档记录:{}",hitsElem.getSourceAsMap());
        });

    }

}
案例 - 京东抓取数据、前端展示

项目结构

EsController.java

package top.linruchang.springbooottest.controller;

import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.lang.Console;
import cn.hutool.core.util.StrUtil;
import com.alibaba.fastjson.JSON;
import lombok.SneakyThrows;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.ResponseBody;
import top.linruchang.springbooottest.bean.db.Product;
import top.linruchang.springbooottest.utils.JdUtil;

import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
 * 作用:
 *
 * @author LinRuChang
 * @version 1.0
 * @date 2022/03/29
 * @since 1.8
 **/
@Controller
@RequestMapping("es")
public class EsController {

    @Autowired
    RestHighLevelClient restHighLevelClient;

    private final String indexName = "lrc_info_product_jd";

    @GetMapping("/index")
    public String indexHtml() {
        return "index";
    }

    @SneakyThrows
    @GetMapping("/pullJdInfo")
    @ResponseBody
    public Object pullJdInfo(String productName) {
        Stream.of(productName)
                .filter(StrUtil::isNotBlank)
                .parallel()
                .forEach(keyword -> {
                    final BulkRequest bulkRequest = new BulkRequest();
                    List<Product> products = JdUtil.getProductInfo(keyword);
                    products.stream().forEach(product -> {
                        IndexRequest indexRequest = new IndexRequest(indexName);
                        //indexRequest.id(UUID.randomUUID().toString(true));
                        indexRequest.source(JSON.toJSONString(product), XContentType.JSON);
                        bulkRequest.add(indexRequest);
                    });

                    BulkResponse bulkResponse = null;
                    try {
                        bulkResponse = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
                        Console.log("关键字【{}】:{}", keyword, bulkResponse.status());
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                });

        return true;
    }

    @SneakyThrows
    @GetMapping("/getJdInfo")
    @ResponseBody
    public Object getJdInfo(String productName, String type) {

        String highlightField = "name";

        SearchRequest searchRequest = new SearchRequest(indexName);
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        if (StrUtil.isNotBlank(productName)) {
            searchSourceBuilder.query(QueryBuilders.matchQuery("name", productName));
        }
        searchSourceBuilder.from(0);
        searchSourceBuilder.size(10000);
        searchSourceBuilder.highlighter(new HighlightBuilder().field(highlightField).preTags("<span style='color:red'>").postTags("</span>").requireFieldMatch(true));
        searchRequest.source(searchSourceBuilder);

        SearchResponse search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        SearchHit[] searchHits = search.getHits().getHits();
        List<Map> result = Stream.of(searchHits)
                .map(searchHit -> {
                    Map<String, Object> sourceAsMap = searchHit.getSourceAsMap();

                    //高亮的字段需要特别的处理 == 结果集是没有高亮信息只能一个一个赋值处理
                    HighlightField highlightField1 = searchHit.getHighlightFields().get(highlightField);
                    if(highlightField1 != null) {
                        List<Text> texts = Arrays.asList(highlightField1.getFragments());
                        String highlightNameText = CollUtil.join(CollUtil.getFieldValues(texts,"text"), "");
                        sourceAsMap.put("highlightName", highlightNameText);
                    }

                    return sourceAsMap;
                })
                .collect(Collectors.toList());

        Map resultMap = new HashMap<>();
        resultMap.put("size", result.size());
        resultMap.put("result", result);

        if(result.size() == 0 && StrUtil.equals(type, "1")) {
            pullJdInfo(productName);
            Thread.sleep(5000);
            return getJdInfo(productName, null);
        }

        return resultMap;
    }

}

JdUtil.java

package top.linruchang.springbooottest.utils;

import cn.hutool.core.lang.Console;
import cn.hutool.core.util.StrUtil;
import lombok.SneakyThrows;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import top.linruchang.springbooottest.bean.db.Product;

import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**
 * 作用:
 *
 * @author LinRuChang
 * @version 1.0
 * @date 2022/03/29
 * @since 1.8
 **/
public class JdUtil {

    /**
     * 京东搜索
     * @param productName 搜索条件
     * @return
     */
    @SneakyThrows
    public static List<Product> getProductInfo(String productName) {
        List<Product> products = Collections.emptyList();
        if(StrUtil.isNotBlank(productName)) {
            String keyword = productName;
            String url = StrUtil.format("https://search.jd.com/Search?keyword={}&enc=utf-8&wq=%E6%89%8B%E6%9C%BA&pvid=52652c1508ae479cb6765f1a871ed23c", keyword);

            Document doc = Jsoup.parse(new URL(url), 5000);

            //Console.log("标题:{}",doc.title());
            //Console.log("内容:{}",doc.body().text());
            Element j_goodsList = doc.getElementById("J_goodsList");
            //Console.log(j_goodsList.html());
            Elements liElements = j_goodsList.select(".gl-item");

            products = new ArrayList<>();
            for (Element liElement : liElements) {

                String imgUrl = liElement.getElementsByTag("img").eq(0).attr("data-lazy-img");
                imgUrl = StrUtil.addPrefixIfNot(imgUrl,"http:");

                Product product = Product.builder()
                        .name(liElement.getElementsByClass("p-name").eq(0).text())
                        .price(liElement.getElementsByClass("p-price").get(0).getElementsByTag("strong").text())
                        .VipPrice(liElement.getElementsByClass("p-price").get(0).select("span[title='PLUS会员专享价']").text())
                        .imgUrl(imgUrl)
                        .tags(liElement.select(".p-icons i").eachText())
                        .shopName(liElement.getElementsByClass("p-shop").text()).build();
                products.add(product);

            }
        }
        return products;
    }

    public static void main(String[] args) {
        List<Product> products = getProductInfo("神舟");
        products.forEach(Console::log);
    }

}

Product.java

package top.linruchang.springbooottest.bean.db;

import lombok.Builder;
import lombok.Data;
import lombok.experimental.Accessors;

import java.util.List;

/**
 * 作用:
 *
 * @author LinRuChang
 * @version 1.0
 * @date 2022/03/29
 * @since 1.8
 **/
@Data
@Builder
@Accessors(chain = true)
public class Product {

    /**
     * 产品名
     */
    String name;

    /**
     * 普通价格
     */
    String price;

    /**
     * 会员价
     */
    String VipPrice;

    /**
     * 产品图片
     */
    String imgUrl;

    /**
     * 店铺名
     */
    String shopName;

    /**
     * 产品标签
     */
    List<String> tags;
}

index.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Document</title>
    <script src="/jquery-3.5.1.min.js"></script>
    <script src="/vue.global.js"></script>
    <style>
        html, body {
            width: 100%;
            height: 100%;
        }

        .search-box {
            /*text-align: center;*/
            margin-bottom: 50px;
            background: antiquewhite;
            height: 20%;
            display: flex;
            align-items: center; /*定义body的元素垂直居中*/
            justify-content: center; /*定义body的里的元素水平居中*/
        }
    </style>
</head>
<body>

<div id="box" style="height: 100%">

    <div class="search-box" >
        <input type="text" name="productName" v-model="productName" @keyup.enter="searchProductName">
        <button @click="searchProductName" >搜索产品</button>
    </div>

    <div v-for="product,index in products" :key="index" style="border-bottom: 5px solid red;margin-bottom: 10px">
        <img :src="product.imgUrl">
        <p>{{product.name}}</p>
        <p v-html="product.highlightName"></p>
        <p>{{product.price}}</p>
        <p>{{product.VipPrice}}</p>
        <p>{{product.shopName}}</p>
        <p>
            <span v-for="tag,tagIndex in product.tags"
                  style="display: inline-block;background: aqua;margin-right: 10px">{{tag}}</span>
        </p>
    </div>
</div>
</body>

<script>

    function getQueryVariable(variable) {
        var query = window.location.search.substring(1);
        var vars = query.split("&");
        for (var i = 0; i < vars.length; i++) {
            var pair = vars[i].split("=");
            if (pair[0] == variable) {
                return pair[1];
            }
        }
        return (false);
    }

    var app = Vue.createApp({
        data: function () {
            return {
                names: ['a', 'b'],
                products: [],
                productName: ""
            }
        },
        methods: {
            searchProductName: function () {
                $.get("http://127.0.0.1:5000/es/getJdInfo?type=1&productName=" + this.productName, (data) => {
                    this.products = data.result;
                }, "json")
            }
        },
        mounted: function () {

            let urlProductName = getQueryVariable("productName")
            if(urlProductName) {
                this.productName = urlProductName;
                $.get("http://127.0.0.1:5000/es/getJdInfo?productName=" + getQueryVariable("productName"), (data) => {
                    this.products = data.result;
                }, "json")
            }
        }
    });
    app.mount("#box");

</script>

</html>

相关文章