2025-06-07 00:38:09 德国世界杯预选赛

ElasticSearch总结

ElasticSearch学习

ElasticSearch用于数据检索,效率非常高效,尤其是在大数据环境下,所以学习非常有必要!

1. 安装

这里我使用阿里云服务器,并且采用Docker 安装ES

安装elasticsearch

# 1.拉取镜像

docker pull elasticsearch:7.7.1

# 2.生成容器

docker run -d -p 9300:9300 -p 9200:9200 --name es -e ES_JAVA_OPTS="-Xms128m -Xmx128m" -e "discovery.type=single-node" -v /root/es/plugins:/usr/share/elasticsearch/plugins -v /root/es/data:/usr/share/elasticsearch/data elasticsearch:7.7.1

安装kibana

# 1.下载kibana镜像到本地

docker pull kibana:7.7.1

# 2.启动kibana容器

docker run -d --name kibana -e ELASTICSEARCH_URL=http://47.101.52.63:9200 -p 5601:5601 kibana:7.7.1

安装elasticsearch-head

# 1.下载镜像

docker pull mobz/elasticsearch-head:5

# 2.生成容器

docker run -d -p 9100:9100 --name es-head docker.io/mobz/elasticsearch-head:5

# 3.在这里可能会出现跨域拒绝访问问题

进入elasticsearch容器内部,修改配置文件elasticsearch.yml

docker ps -a #拿到运行容器elasticsearch 的 id

docker exec -it ******(容器id) /bin/bash

cd ./config

vi elasticsearch.yml

在elasticsearch.yml中添加:

http.cors.enabled: true

http.cors.allow-origin: "*"

然后重启容器

docker restart es

​ 安装IK分词器

# 1.下载对应版本的IK分词器

wget https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.8.2/elasticsearch-analysis-ik-7.7.1.zip

# 2.解压到plugins/elasticsearch文件夹中

yum install -y unzip #下载unzip

unzip -d plugins/elasticsearch elasticsearch-analysis-ik-7.7.1.zip

# 3.添加自定义扩展词和停用词

cd plugins/elasticsearch/config

vim IKAnalyzer.cfg.xml

IK Analyzer 扩展配置

ext_dict.dic

ext_stopwords.dic

# 4.在ik分词器目录下config目录中创建ext_dict.dic文件 编码一定要为UTF-8才能生效

vim ext_dict.dic 加入扩展词即可

# 5. 在ik分词器目录下config目录中创建ext_stopword.dic文件

vim ext_stopwords.dic 加入停用词即可

# 6.将此容器提交成为一个新的镜像

docker commit -a="zk" -m="with IKAnalyzer" b35d35f72b8d zk/elasticsearch:6.8.2

# 7.使用新生成的这个es镜像创建容器,并挂载数据卷

docker run -d --name es -p 9200:9200 -p 9300:9300 -e ES_JAVA_OPTS="-Xms128m -Xmx128m" -v /usr/local/IKAnalyzer:/usr/share/elasticsearch/plugins/elasticsearch/config zk/elasticsearch:6.8.2

2.项目实战(基于es的仿京东搜索)

爬虫

导入jsoup依赖

org.jsoup

jsoup

1.10.2

编写测试,生成工具类

package com.ittao.utils;

import com.ittao.entity.Content;

import org.jsoup.Jsoup;

import org.jsoup.nodes.Document;

import org.jsoup.nodes.Element;

import org.jsoup.select.Elements;

import java.io.IOException;

import java.net.URL;

import java.util.ArrayList;

import java.util.List;

public class HtmlParseUtil {

// public static void main(String[] args) throws IOException {

// HtmlParseUtil.parseJd("黄涛").forEach(System.out::println);

//

// }

/**

* 解析京东页面 获取数据

* @param keyword

* @return

* @throws IOException

*/

public static List parseJd(String keyword) throws IOException {

//1.获取搜索url

String url = "https://search.jd.com/Search?keyword="+keyword+"&enc=utf-8";

//2.通过jsoup解析 获取文档对象

Document document = Jsoup.parse(new URL(url), 30000);

//3.接下来的操作和js一样了

Element j_goodsList = document.getElementById("J_goodsList");

Elements elements = j_goodsList.getElementsByTag("li");

ArrayList contentArrayList = new ArrayList<>();

for (Element element : elements) {

String img = element.getElementsByTag("img").eq(0).attr("src");

String price = element.getElementsByClass("p-price").text();

String title = element.getElementsByClass("p-name").eq(0).text();

Content content = new Content();

content.setTitle(title);

content.setImg(img);

content.setPrice(price);

contentArrayList.add(content);

}

return contentArrayList;

}

}

前后端分离实现 后端实现

整体结构

pom.xml

xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">

4.0.0

org.springframework.boot

spring-boot-starter-parent

2.3.0.RELEASE

com.ittao

elasticsearch_study

0.0.1-SNAPSHOT

elasticsearch_study

Demo project for Spring Boot

1.8

7.7.1

org.springframework.boot

spring-boot-starter-data-elasticsearch

org.springframework.boot

spring-boot-starter-web

org.springframework.boot

spring-boot-devtools

runtime

true

org.springframework.boot

spring-boot-configuration-processor

true

org.projectlombok

lombok

true

org.jsoup

jsoup

1.10.2

com.alibaba

fastjson

1.2.61

org.springframework.boot

spring-boot-starter-test

test

org.junit.vintage

junit-vintage-engine

org.springframework.boot

spring-boot-maven-plugin

config

package com.ittao.config;

import org.apache.http.HttpHost;

import org.elasticsearch.client.RestClient;

import org.elasticsearch.client.RestHighLevelClient;

import org.springframework.context.annotation.Bean;

import org.springframework.context.annotation.Configuration;

/**

* ElasticsearchConfig

* 这是es配置类 注入到spring容器

* create by 黄小涛

* 2020-06-07

*/

@Configuration

public class ElasticsearchConfig {

@Bean

public RestHighLevelClient restHighLevelClient(){

RestHighLevelClient client = new RestHighLevelClient(

RestClient.builder(

new HttpHost("47.101.52.63", 9200, "http")));

return client;

}

}

entity

package com.ittao.entity;

import lombok.AllArgsConstructor;

import lombok.Data;

import lombok.NoArgsConstructor;

/**

* Content

*

* create by 黄小涛

* 2020-06-08

*/

@Data

@AllArgsConstructor

@NoArgsConstructor

public class Content {

private String title;

private String img;

private String price;

}

package com.ittao.entity;

import lombok.AllArgsConstructor;

import lombok.Data;

import lombok.NoArgsConstructor;

import lombok.experimental.Accessors;

import org.springframework.stereotype.Component;

/**

* User

*

* create by 黄小涛

* 2020-06-08

*/

@Data

@AllArgsConstructor

@NoArgsConstructor

@Accessors(chain = true)

@Component

public class User {

private String name;

private int age;

}

service

package com.ittao.service.impl;

import com.alibaba.fastjson.JSON;

import com.ittao.entity.Content;

import com.ittao.service.ContentService;

import com.ittao.utils.HtmlParseUtil;

import org.elasticsearch.action.bulk.BulkRequest;

import org.elasticsearch.action.bulk.BulkResponse;

import org.elasticsearch.action.index.IndexRequest;

import org.elasticsearch.action.search.SearchRequest;

import org.elasticsearch.action.search.SearchResponse;

import org.elasticsearch.client.RequestOptions;

import org.elasticsearch.client.RestHighLevelClient;

import org.elasticsearch.common.text.Text;

import org.elasticsearch.common.unit.TimeValue;

import org.elasticsearch.common.xcontent.XContentType;

import org.elasticsearch.index.query.FuzzyQueryBuilder;

import org.elasticsearch.index.query.MatchQueryBuilder;

import org.elasticsearch.index.query.QueryBuilders;

import org.elasticsearch.index.query.TermQueryBuilder;

import org.elasticsearch.search.SearchHit;

import org.elasticsearch.search.builder.SearchSourceBuilder;

import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;

import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;

import org.springframework.beans.factory.annotation.Autowired;

import org.springframework.beans.factory.annotation.Qualifier;

import org.springframework.stereotype.Service;

import java.io.IOException;

import java.util.ArrayList;

import java.util.List;

import java.util.Map;

import java.util.concurrent.TimeUnit;

@Service

public class ContentServiceImpl implements ContentService {

@Autowired

@Qualifier("restHighLevelClient")

private RestHighLevelClient client;

/**

* 将爬取的数据添加到es中

* @param keyword

* @return

* @throws IOException

*/

@Override

public boolean addToEs(String keyword) throws IOException {

//1.获取要添加的数据

List contentList = HtmlParseUtil.parseJd(keyword);

//2.创建批量添加请求

BulkRequest request = new BulkRequest();

//3.批量添加数据

for (Content content : contentList) {

request.add(new IndexRequest("jd_goods"). //添加到jd_goods这个索引中

source(JSON.toJSONString(content), XContentType.JSON));

}

request.timeout(new TimeValue(2, TimeUnit.MINUTES));

//4.执行批量添加请求

BulkResponse response = client.bulk(request, RequestOptions.DEFAULT);

//5.获取响应

return !response.hasFailures();

}

@Override

public List> searchPage(String keyword, int pageNo, int pageSize) throws IOException {

if (pageNo<=0){

pageNo=1;

}

//根据关键字进行搜索

//1.创建搜索请求

SearchRequest request = new SearchRequest("jd_goods");

//2.添加搜索条件

SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();

//添加分页

sourceBuilder.from(pageNo);

sourceBuilder.size(pageSize);

//添加高亮

HighlightBuilder highlightBuilder = new HighlightBuilder();

//设置高亮字段

highlightBuilder.field("title");

//是否显示多个高亮

highlightBuilder.requireFieldMatch(true);

//设置高亮前缀

highlightBuilder.preTags("");

//设置高亮后缀

highlightBuilder.postTags("");

sourceBuilder.highlighter(highlightBuilder);

//根据关键字搜索title包含的

MatchQueryBuilder termQuery = QueryBuilders.matchQuery("title", keyword);

sourceBuilder.query(termQuery);

sourceBuilder.timeout(new TimeValue(1, TimeUnit.MINUTES));

request.source(sourceBuilder);

//3.执行搜索

SearchResponse response = client.search(request, RequestOptions.DEFAULT);

//4.将响应数据进行封装

List> mapList = new ArrayList<>();

for (SearchHit documentFields : response.getHits().getHits()) {

//目标:将高亮字段替换我们原先的字段

Map sourceAsMap = documentFields.getSourceAsMap(); //原先的字段

//1.获取高亮的全部字段

Map highlightFields = documentFields.getHighlightFields();

//2.获取我们设置的title高亮字段

HighlightField title = highlightFields.get("title");

//3.解析高亮的字段

if (title!=null){

//获取高亮片段

Text[] fragments = title.getFragments();

String n_title="";

for (Text fragment : fragments) {

n_title +=fragment;

}

//4.替换

sourceAsMap.put("title", n_title);

}

mapList.add(sourceAsMap);

}

return mapList;

}

}

controller

package com.ittao.Controller;

import com.ittao.service.ContentService;

import org.springframework.beans.factory.annotation.Autowired;

import org.springframework.web.bind.annotation.CrossOrigin;

import org.springframework.web.bind.annotation.GetMapping;

import org.springframework.web.bind.annotation.PathVariable;

import org.springframework.web.bind.annotation.RestController;

import java.io.IOException;

import java.util.List;

import java.util.Map;

@RestController

@CrossOrigin

public class ContentController {

@Autowired

private ContentService contentService;

@GetMapping("/addToEs/{keyword}")

public boolean addToEs(@PathVariable("keyword") String keyword) throws IOException {

return contentService.addToEs(keyword);

}

@GetMapping("/searchPage/{keyword}/{pageNo}/{pageSize}")

public List> searchPage(@PathVariable("keyword") String keyword,

@PathVariable("pageNo") int pageNo,

@PathVariable("pageSize") int pageSize) throws IOException {

return contentService.searchPage(keyword, pageNo, pageSize);

}

}

utils

package com.ittao.utils;

import com.ittao.entity.Content;

import org.jsoup.Jsoup;

import org.jsoup.nodes.Document;

import org.jsoup.nodes.Element;

import org.jsoup.select.Elements;

import java.io.IOException;

import java.net.URL;

import java.util.ArrayList;

import java.util.List;

public class HtmlParseUtil {

// public static void main(String[] args) throws IOException {

// HtmlParseUtil.parseJd("黄涛").forEach(System.out::println);

//

// }

/**

* 解析京东页面 获取数据

* @param keyword

* @return

* @throws IOException

*/

public static List parseJd(String keyword) throws IOException {

//1.获取搜索url

String url = "https://search.jd.com/Search?keyword="+keyword+"&enc=utf-8";

//2.通过jsoup解析 获取文档对象

Document document = Jsoup.parse(new URL(url), 30000);

//3.接下来的操作和js一样了

Element j_goodsList = document.getElementById("J_goodsList");

Elements elements = j_goodsList.getElementsByTag("li");

ArrayList contentArrayList = new ArrayList<>();

for (Element element : elements) {

String img = element.getElementsByTag("img").eq(0).attr("src");

String price = element.getElementsByClass("p-price").text();

String title = element.getElementsByClass("p-name").eq(0).text();

Content content = new Content();

content.setTitle(title);

content.setImg(img);

content.setPrice(price);

contentArrayList.add(content);

}

return contentArrayList;

}

}

前端实现

首页

查询页面

!

生成数据页面

route中index.js

import Vue from 'vue'

import VueRouter from 'vue-router'

import Search from '../views/Search.vue'

import Home from '../views/Home.vue'

import GenerateData from '../views/GenerateData.vue'

Vue.use(VueRouter)

const routes = [

{

path: '/',

name: 'Home',

component: Home

},

{

path: '/search',

name: 'Search',

component: Search

},

{

path: '/generateData',

name: 'GenerateData',

component: GenerateData

}

]

const router = new VueRouter({

mode: 'history',

base: process.env.BASE_URL,

routes

})

export default router

main.js

import Vue from 'vue'

import App from './App.vue'

import router from './router'

import store from './store'

import ElementUI from 'element-ui'

import 'element-ui/lib/theme-chalk/index.css'

import axios from 'axios'

Vue.config.productionTip = false

Vue.use(ElementUI);

Vue.prototype.$http = axios

Vue.prototype.$http.defaults.baseURL = 'http://localhost:8989' // `baseURL` 将自动加在 `url` 前面,除非 `url` 是一个绝对 URL

new Vue({

router,

store,

render: h => h(App)

}).$mount('#app')

3.总结

通过学习,对es的基本使用算是初步的掌握了.学习es的步骤如下

1.es的安装,尤其通过docker安装

2.es的简单restful api的使用,包括简单查询和复杂查询,通过kibana可视化界面操作

3.es的java客户端工具api学习,通过java语句去实现增删改查,其实本质上和查询语句类似,该有的方法都有,

4.最后通过es仿京东搜索的实战练习,达到了对es有基本的运行能力

《人民的名义》蔡成功结局是什么死了吗?蔡成功和陈海事故有关系吗?
华为流量校准老是失败怎么解决 华为手机流量校准失败
top