ElasticSearch总结
ElasticSearch学习
ElasticSearch用于数据检索,效率非常高效,尤其是在大数据环境下,所以学习非常有必要!
1. 安装
这里我使用阿里云服务器,并且采用Docker 安装ES
安装elasticsearch
# 1.拉取镜像
docker pull elasticsearch:7.7.1
# 2.生成容器
docker run -d -p 9300:9300 -p 9200:9200 --name es -e ES_JAVA_OPTS="-Xms128m -Xmx128m" -e "discovery.type=single-node" -v /root/es/plugins:/usr/share/elasticsearch/plugins -v /root/es/data:/usr/share/elasticsearch/data elasticsearch:7.7.1
安装kibana
# 1.下载kibana镜像到本地
docker pull kibana:7.7.1
# 2.启动kibana容器
docker run -d --name kibana -e ELASTICSEARCH_URL=http://47.101.52.63:9200 -p 5601:5601 kibana:7.7.1
安装elasticsearch-head
# 1.下载镜像
docker pull mobz/elasticsearch-head:5
# 2.生成容器
docker run -d -p 9100:9100 --name es-head docker.io/mobz/elasticsearch-head:5
# 3.在这里可能会出现跨域拒绝访问问题
进入elasticsearch容器内部,修改配置文件elasticsearch.yml
docker ps -a #拿到运行容器elasticsearch 的 id
docker exec -it ******(容器id) /bin/bash
cd ./config
vi elasticsearch.yml
在elasticsearch.yml中添加:
http.cors.enabled: true
http.cors.allow-origin: "*"
然后重启容器
docker restart es
安装IK分词器
# 1.下载对应版本的IK分词器
wget https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.8.2/elasticsearch-analysis-ik-7.7.1.zip
# 2.解压到plugins/elasticsearch文件夹中
yum install -y unzip #下载unzip
unzip -d plugins/elasticsearch elasticsearch-analysis-ik-7.7.1.zip
# 3.添加自定义扩展词和停用词
cd plugins/elasticsearch/config
vim IKAnalyzer.cfg.xml
# 4.在ik分词器目录下config目录中创建ext_dict.dic文件 编码一定要为UTF-8才能生效
vim ext_dict.dic 加入扩展词即可
# 5. 在ik分词器目录下config目录中创建ext_stopword.dic文件
vim ext_stopwords.dic 加入停用词即可
# 6.将此容器提交成为一个新的镜像
docker commit -a="zk" -m="with IKAnalyzer" b35d35f72b8d zk/elasticsearch:6.8.2
# 7.使用新生成的这个es镜像创建容器,并挂载数据卷
docker run -d --name es -p 9200:9200 -p 9300:9300 -e ES_JAVA_OPTS="-Xms128m -Xmx128m" -v /usr/local/IKAnalyzer:/usr/share/elasticsearch/plugins/elasticsearch/config zk/elasticsearch:6.8.2
2.项目实战(基于es的仿京东搜索)
爬虫
导入jsoup依赖
编写测试,生成工具类
package com.ittao.utils;
import com.ittao.entity.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
public class HtmlParseUtil {
// public static void main(String[] args) throws IOException {
// HtmlParseUtil.parseJd("黄涛").forEach(System.out::println);
//
// }
/**
* 解析京东页面 获取数据
* @param keyword
* @return
* @throws IOException
*/
public static List
//1.获取搜索url
String url = "https://search.jd.com/Search?keyword="+keyword+"&enc=utf-8";
//2.通过jsoup解析 获取文档对象
Document document = Jsoup.parse(new URL(url), 30000);
//3.接下来的操作和js一样了
Element j_goodsList = document.getElementById("J_goodsList");
Elements elements = j_goodsList.getElementsByTag("li");
ArrayList
for (Element element : elements) {
String img = element.getElementsByTag("img").eq(0).attr("src");
String price = element.getElementsByClass("p-price").text();
String title = element.getElementsByClass("p-name").eq(0).text();
Content content = new Content();
content.setTitle(title);
content.setImg(img);
content.setPrice(price);
contentArrayList.add(content);
}
return contentArrayList;
}
}
前后端分离实现 后端实现
整体结构
pom.xml
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
config
package com.ittao.config;
import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
/**
* ElasticsearchConfig
* 这是es配置类 注入到spring容器
* create by 黄小涛
* 2020-06-07
*/
@Configuration
public class ElasticsearchConfig {
@Bean
public RestHighLevelClient restHighLevelClient(){
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("47.101.52.63", 9200, "http")));
return client;
}
}
entity
package com.ittao.entity;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
/**
* Content
*
* create by 黄小涛
* 2020-06-08
*/
@Data
@AllArgsConstructor
@NoArgsConstructor
public class Content {
private String title;
private String img;
private String price;
}
package com.ittao.entity;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.Accessors;
import org.springframework.stereotype.Component;
/**
* User
*
* create by 黄小涛
* 2020-06-08
*/
@Data
@AllArgsConstructor
@NoArgsConstructor
@Accessors(chain = true)
@Component
public class User {
private String name;
private int age;
}
service
package com.ittao.service.impl;
import com.alibaba.fastjson.JSON;
import com.ittao.entity.Content;
import com.ittao.service.ContentService;
import com.ittao.utils.HtmlParseUtil;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.FuzzyQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
@Service
public class ContentServiceImpl implements ContentService {
@Autowired
@Qualifier("restHighLevelClient")
private RestHighLevelClient client;
/**
* 将爬取的数据添加到es中
* @param keyword
* @return
* @throws IOException
*/
@Override
public boolean addToEs(String keyword) throws IOException {
//1.获取要添加的数据
List
//2.创建批量添加请求
BulkRequest request = new BulkRequest();
//3.批量添加数据
for (Content content : contentList) {
request.add(new IndexRequest("jd_goods"). //添加到jd_goods这个索引中
source(JSON.toJSONString(content), XContentType.JSON));
}
request.timeout(new TimeValue(2, TimeUnit.MINUTES));
//4.执行批量添加请求
BulkResponse response = client.bulk(request, RequestOptions.DEFAULT);
//5.获取响应
return !response.hasFailures();
}
@Override
public List
if (pageNo<=0){
pageNo=1;
}
//根据关键字进行搜索
//1.创建搜索请求
SearchRequest request = new SearchRequest("jd_goods");
//2.添加搜索条件
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
//添加分页
sourceBuilder.from(pageNo);
sourceBuilder.size(pageSize);
//添加高亮
HighlightBuilder highlightBuilder = new HighlightBuilder();
//设置高亮字段
highlightBuilder.field("title");
//是否显示多个高亮
highlightBuilder.requireFieldMatch(true);
//设置高亮前缀
highlightBuilder.preTags("");
//设置高亮后缀
highlightBuilder.postTags("");
sourceBuilder.highlighter(highlightBuilder);
//根据关键字搜索title包含的
MatchQueryBuilder termQuery = QueryBuilders.matchQuery("title", keyword);
sourceBuilder.query(termQuery);
sourceBuilder.timeout(new TimeValue(1, TimeUnit.MINUTES));
request.source(sourceBuilder);
//3.执行搜索
SearchResponse response = client.search(request, RequestOptions.DEFAULT);
//4.将响应数据进行封装
List
for (SearchHit documentFields : response.getHits().getHits()) {
//目标:将高亮字段替换我们原先的字段
Map
//1.获取高亮的全部字段
Map
//2.获取我们设置的title高亮字段
HighlightField title = highlightFields.get("title");
//3.解析高亮的字段
if (title!=null){
//获取高亮片段
Text[] fragments = title.getFragments();
String n_title="";
for (Text fragment : fragments) {
n_title +=fragment;
}
//4.替换
sourceAsMap.put("title", n_title);
}
mapList.add(sourceAsMap);
}
return mapList;
}
}
controller
package com.ittao.Controller;
import com.ittao.service.ContentService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.CrossOrigin;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RestController;
import java.io.IOException;
import java.util.List;
import java.util.Map;
@RestController
@CrossOrigin
public class ContentController {
@Autowired
private ContentService contentService;
@GetMapping("/addToEs/{keyword}")
public boolean addToEs(@PathVariable("keyword") String keyword) throws IOException {
return contentService.addToEs(keyword);
}
@GetMapping("/searchPage/{keyword}/{pageNo}/{pageSize}")
public List
@PathVariable("pageNo") int pageNo,
@PathVariable("pageSize") int pageSize) throws IOException {
return contentService.searchPage(keyword, pageNo, pageSize);
}
}
utils
package com.ittao.utils;
import com.ittao.entity.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
public class HtmlParseUtil {
// public static void main(String[] args) throws IOException {
// HtmlParseUtil.parseJd("黄涛").forEach(System.out::println);
//
// }
/**
* 解析京东页面 获取数据
* @param keyword
* @return
* @throws IOException
*/
public static List
//1.获取搜索url
String url = "https://search.jd.com/Search?keyword="+keyword+"&enc=utf-8";
//2.通过jsoup解析 获取文档对象
Document document = Jsoup.parse(new URL(url), 30000);
//3.接下来的操作和js一样了
Element j_goodsList = document.getElementById("J_goodsList");
Elements elements = j_goodsList.getElementsByTag("li");
ArrayList
for (Element element : elements) {
String img = element.getElementsByTag("img").eq(0).attr("src");
String price = element.getElementsByClass("p-price").text();
String title = element.getElementsByClass("p-name").eq(0).text();
Content content = new Content();
content.setTitle(title);
content.setImg(img);
content.setPrice(price);
contentArrayList.add(content);
}
return contentArrayList;
}
}
前端实现
首页
ElasticSearch的简单实战
第一个功能:从京东商城中爬取我们搜索的数据,存放到eslasticsearch中
第二个功能:从eslasticsearch中根据关键字查询我们的数据,进行展示
点我去搜索数据
点我去爬取数据
import Logosrc from "../assets/img/logo.png";
export default {
name: "Home",
data() {
return {
logosrc: Logosrc
};
},
components: {},
created() {},
methods: {}
};
.text{
font-size: 20px;
}
.link{
text-align: left;
margin-left: 450px;
}
.logo{
height: 200px;
}
查询页面
!
import Logosrc from "../assets/img/logo.png";
export default {
name: "Search",
data() {
return {
logosrc: Logosrc,
input: "",
dataList: []
};
},
components: {},
created() {},
methods: {
searchData() {
//获得搜索的关键字
console.log(this.input)
//发送axios请求
this.$http.get(`/searchPage/${this.input}/${1}/${20}`).then(res =>{
console.log(res.data)
this.dataList = res.data
if(this.dataList.length < 1){
alert("暂无数据请重新搜索或者去生成数据!")
}
})
}
}
};
.el-header,
.el-footer {
text-align: center;
line-height: 80px;
}
.el-main {
text-align: center;
line-height: 800px;
}
body > .el-container {
margin-bottom: 40px;
}
.el-container:nth-child(5) .el-aside,
.el-container:nth-child(6) .el-aside {
line-height: 260px;
}
.el-container:nth-child(7) .el-aside {
line-height: 320px;
}
.content {
/* border: 1px solid sandybrown; */
width: 100%;
height: 1200px;
margin: 50px auto;
}
.row {
/* border: 1px solid saddlebrown; */
width: 100%;
height: 400px;
float: left;
}
.col {
/* border: 1px solid tan; */
width: 25%;
height: 400px;
float: left;
}
.image {
text-align: left;
margin-left: 20px;
}
.p-price {
text-align: left;
margin-left: 20px;
color: red;
}
.p-title1{
font-size: 10px;
}
.p-commit {
text-align: left;
}
.p-shop {
text-align: left;
}
ul li {
list-style-type: none;
}
生成数据页面
import Logosrc from "../assets/img/logo.png";
export default {
name: "GenerateData",
data() {
return {
input: "",
logosrc: Logosrc
};
},
components: {},
created() {},
methods: {
generateData() {
this.$http.get(`/addToEs/${this.input}`).then(res => {
console.log(res.data);
if (res.data == true) {
this.$message({
type: "success",
message: "生成数据成功,你可以去查询啦!"
});
}
});
}
}
};
.el-header,
.el-footer {
text-align: center;
line-height: 80px;
}
.el-main {
text-align: center;
line-height: 800px;
}
body > .el-container {
margin-bottom: 40px;
}
.el-container:nth-child(5) .el-aside,
.el-container:nth-child(6) .el-aside {
line-height: 260px;
}
.el-container:nth-child(7) .el-aside {
line-height: 320px;
}
route中index.js
import Vue from 'vue'
import VueRouter from 'vue-router'
import Search from '../views/Search.vue'
import Home from '../views/Home.vue'
import GenerateData from '../views/GenerateData.vue'
Vue.use(VueRouter)
const routes = [
{
path: '/',
name: 'Home',
component: Home
},
{
path: '/search',
name: 'Search',
component: Search
},
{
path: '/generateData',
name: 'GenerateData',
component: GenerateData
}
]
const router = new VueRouter({
mode: 'history',
base: process.env.BASE_URL,
routes
})
export default router
main.js
import Vue from 'vue'
import App from './App.vue'
import router from './router'
import store from './store'
import ElementUI from 'element-ui'
import 'element-ui/lib/theme-chalk/index.css'
import axios from 'axios'
Vue.config.productionTip = false
Vue.use(ElementUI);
Vue.prototype.$http = axios
Vue.prototype.$http.defaults.baseURL = 'http://localhost:8989' // `baseURL` 将自动加在 `url` 前面,除非 `url` 是一个绝对 URL
new Vue({
router,
store,
render: h => h(App)
}).$mount('#app')
3.总结
通过学习,对es的基本使用算是初步的掌握了.学习es的步骤如下
1.es的安装,尤其通过docker安装
2.es的简单restful api的使用,包括简单查询和复杂查询,通过kibana可视化界面操作
3.es的java客户端工具api学习,通过java语句去实现增删改查,其实本质上和查询语句类似,该有的方法都有,
4.最后通过es仿京东搜索的实战练习,达到了对es有基本的运行能力