重新设置Elasticsearch的mapping
csdn有很多装插件的方法,但是怕数据搞坏了,于是用python写了个脚本,获取数据,并重新存到新的index里面。
代码如下
import re
from elasticsearch import Elasticsearch
from bs4 import BeautifulSoup
import time
es = Elasticsearch([
{'host': '192.168.1.126'},
{'host': 'othernode', 'port': 9200, 'url_prefix': 'es', 'use_ssl': True},
])
body='{"size": 6000}'
result=es.search(index='smetrend', doc_type='info',body=body)
hits=result['hits']
hit_i=hits['hits']
num=0
for hit_0 in hit_i:
time.sleep(0.01)
id_i=hit_0['_id']
hit_source=hit_0['_source'] # 获取数据
institution=hit_source['institution']
publish_date=hit_source['publish_date']
source_url=hit_source['source_url']
text_summary=hit_source['text_summary']
title=hit_source['title']
data='{"title":"%s","publish_date":"%s","institution":"%s","source_url":"%s","text_summary":"%s"}' %(title, publish_date, institution ,source_url, text_summary)
try:
result = es.create(index='smetrend_date', doc_type='info', id=id_i, body=data)
print(result)
num=num+1
print(num)
except Exception as e:
print (e)
continue
修改后的mapping:
PUT smetrend_date
{
"mappings": {
"info": {
"properties": {
"title": {
"store": true,
"type": "text",
"analyzer": "ik_max_word",
"fielddata": true
},
"publish_data": {
"store": true,
"type": "date",
"format": "yyyy-MM-dd"
},
"source_url": {
"store": true,
"type": "keyword"
},
"text_summary": {
"store": true,
"type": "text",
"analyzer": "ik_max_word",
"fielddata": true
},
"publish_institution": {
"store": true,
"type": "text",
"analyzer": "ik_max_word",
"fielddata": true
},
"sign": {
"store": true,
"type": "keyword"
}
}
}
}
}
根据时间查询的代码:
GET smetrend_date/info/_search
{
"query": {
"range": {
"publish_date": {
"gte": "2018-01-08",
"lte": "2019-12-12",
"format": "yyyy-MM-dd"
}
}
}
}