From 95faf6e20472ec089b46cd8979bfd5579ed9fc09 Mon Sep 17 00:00:00 2001 From: alavi_team_user2 Date: Sun, 14 Sep 2025 20:02:12 +0330 Subject: [PATCH] first commit --- .env | 41 ++++++ .env.example | 41 ++++++ .gitignore | 4 + docker-compose.yml | 220 +++++++++++++++++++++++++++++++++ exec | 114 +++++++++++++++++ log_project/.env.example | 6 + log_project/docker-compose.yml | 21 ++++ log_project/filebeat.yml | 36 ++++++ log_project/log_project.py | 67 ++++++++++ log_project/vector.yaml | 42 +++++++ metricbeat.yml | 56 +++++++++ work_with_elastic.py | 53 ++++++++ 12 files changed, 701 insertions(+) create mode 100644 .env create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 docker-compose.yml create mode 100644 exec create mode 100644 log_project/.env.example create mode 100644 log_project/docker-compose.yml create mode 100755 log_project/filebeat.yml create mode 100644 log_project/log_project.py create mode 100644 log_project/vector.yaml create mode 100644 metricbeat.yml create mode 100644 work_with_elastic.py diff --git a/.env b/.env new file mode 100644 index 0000000..4bafb7c --- /dev/null +++ b/.env @@ -0,0 +1,41 @@ +# Project namespace (defaults to the current folder name if not set) +#COMPOSE_PROJECT_NAME=myproject + + +# Password for the 'elastic' user (at least 6 characters) +ELASTIC_PASSWORD=lSsAwEE1t1MacY + + +# Password for the 'kibana_system' user (at least 6 characters) +KIBANA_PASSWORD=lSsAwEE1t1MacY + + +# Version of Elastic products +STACK_VERSION=9.1.3 + + +# Set the cluster name +CLUSTER_NAME=docker-cluster + + +# Set to 'basic' or 'trial' to automatically start the 30-day trial +LICENSE=basic +#LICENSE=trial + + +# Port to expose Elasticsearch HTTP API to the host +ES_PORT=3113 + + +# Port to expose Kibana to the host +KIBANA_PORT=3112 + + +# Increase or decrease based on the available host memory (in bytes) +ES_MEM_LIMIT=6073741824 +KB_MEM_LIMIT=2073741824 +LS_MEM_LIMIT=1073741824 + + +# SAMPLE Predefined Key only to be used in POC environments +ENCRYPTION_KEY=FvpPWhx2XNcOE5FC52sLFlPjNSXnpGapQuo4S9jMvO043nfLHpzOAcwwGd6KiBoP diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..1d079d0 --- /dev/null +++ b/.env.example @@ -0,0 +1,41 @@ +# Project namespace (defaults to the current folder name if not set) +#COMPOSE_PROJECT_NAME=myproject + + +# Password for the 'elastic' user (at least 6 characters) +ELASTIC_PASSWORD=PASSWORD + + +# Password for the 'kibana_system' user (at least 6 characters) +KIBANA_PASSWORD=PASSWORD + + +# Version of Elastic products +STACK_VERSION=9.1.3 + + +# Set the cluster name +CLUSTER_NAME=docker-cluster + + +# Set to 'basic' or 'trial' to automatically start the 30-day trial +LICENSE=basic +#LICENSE=trial + + +# Port to expose Elasticsearch HTTP API to the host +ES_PORT=3113 + + +# Port to expose Kibana to the host +KIBANA_PORT=3112 + + +# Increase or decrease based on the available host memory (in bytes) +ES_MEM_LIMIT=6073741824 +KB_MEM_LIMIT=2073741824 +LS_MEM_LIMIT=1073741824 + + +# SAMPLE Predefined Key only to be used in POC environments +ENCRYPTION_KEY=ENCRYPTION_KEY diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..35c6d83 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +log_project/log +ca.crt +log_project/filebeatdata01 +log_project/.env diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..31e98d4 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,220 @@ +version: "3.8" + +volumes: + certs: + driver: local + esdata01: + driver: local + kibanadata: + driver: local + metricbeatdata01: + driver: local + filebeatdata01: + driver: local + logstashdata01: + driver: local + +networks: + default: + name: elastic + external: false + +services: + setup: + image: elasticsearch:${STACK_VERSION} + volumes: + - certs:/usr/share/elasticsearch/config/certs + user: "0" + command: > + bash -c ' + if [ x${ELASTIC_PASSWORD} == x ]; then + echo "Set the ELASTIC_PASSWORD environment variable in the .env file"; + exit 1; + elif [ x${KIBANA_PASSWORD} == x ]; then + echo "Set the KIBANA_PASSWORD environment variable in the .env file"; + exit 1; + fi; + if [ ! -f config/certs/ca.zip ]; then + echo "Creating CA"; + bin/elasticsearch-certutil ca --silent --pem -out config/certs/ca.zip; + unzip config/certs/ca.zip -d config/certs; + fi; + if [ ! -f config/certs/certs.zip ]; then + echo "Creating certs"; + echo -ne \ + "instances:\n"\ + " - name: es01\n"\ + " dns:\n"\ + " - es01\n"\ + " - localhost\n"\ + " ip:\n"\ + " - 127.0.0.1\n"\ + " - name: kibana\n"\ + " dns:\n"\ + " - kibana\n"\ + " - localhost\n"\ + " ip:\n"\ + " - 127.0.0.1\n"\ + > config/certs/instances.yml; + bin/elasticsearch-certutil cert --silent --pem -out config/certs/certs.zip --in config/certs/instances.yml --ca-cert config/certs/ca/ca.crt --ca-key config/certs/ca/ca.key; + unzip config/certs/certs.zip -d config/certs; + fi; + echo "Setting file permissions" + chown -R root:root config/certs; + find . -type d -exec chmod 750 \{\} \;; + find . -type f -exec chmod 640 \{\} \;; + echo "Waiting for Elasticsearch availability"; + until curl -s --cacert config/certs/ca/ca.crt https://es01:9200 | grep -q "missing authentication credentials"; do sleep 30; done; + echo "Setting kibana_system password"; + until curl -s -X POST --cacert config/certs/ca/ca.crt -u "elastic:${ELASTIC_PASSWORD}" -H "Content-Type: application/json" https://es01:9200/_security/user/kibana_system/_password -d "{\"password\":\"${KIBANA_PASSWORD}\"}" | grep -q "^{}"; do sleep 10; done; + echo "All done!"; + ' + healthcheck: + test: ["CMD-SHELL", "[ -f config/certs/es01/es01.crt ]"] + interval: 1s + timeout: 5s + retries: 120 + + es01: + depends_on: + setup: + condition: service_healthy + image: elasticsearch:${STACK_VERSION} + labels: + co.elastic.logs/module: elasticsearch + volumes: + - certs:/usr/share/elasticsearch/config/certs + - esdata01:/usr/share/elasticsearch/data + ports: + - ${ES_PORT}:9200 + environment: + - node.name=es01 + - cluster.name=${CLUSTER_NAME} + - discovery.type=single-node + - ELASTIC_PASSWORD=${ELASTIC_PASSWORD} + - bootstrap.memory_lock=true + - xpack.security.enabled=true + - xpack.security.http.ssl.enabled=true + - xpack.security.http.ssl.key=certs/es01/es01.key + - xpack.security.http.ssl.certificate=certs/es01/es01.crt + - xpack.security.http.ssl.certificate_authorities=certs/ca/ca.crt + - xpack.security.transport.ssl.enabled=true + - xpack.security.transport.ssl.key=certs/es01/es01.key + - xpack.security.transport.ssl.certificate=certs/es01/es01.crt + - xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt + - xpack.security.transport.ssl.verification_mode=certificate + - xpack.license.self_generated.type=${LICENSE} + mem_limit: ${ES_MEM_LIMIT} + ulimits: + memlock: + soft: -1 + hard: -1 + healthcheck: + test: + [ + "CMD-SHELL", + "curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'", + ] + interval: 10s + timeout: 10s + retries: 120 + + kibana: + depends_on: + es01: + condition: service_healthy + image: kibana:${STACK_VERSION} + labels: + co.elastic.logs/module: kibana + volumes: + - certs:/usr/share/kibana/config/certs + - kibanadata:/usr/share/kibana/data + ports: + - ${KIBANA_PORT}:5601 + environment: + - SERVERNAME=kibana + - ELASTICSEARCH_HOSTS=https://es01:9200 + - ELASTICSEARCH_USERNAME=kibana_system + - ELASTICSEARCH_PASSWORD=${KIBANA_PASSWORD} + - ELASTICSEARCH_SSL_CERTIFICATEAUTHORITIES=config/certs/ca/ca.crt + - XPACK_SECURITY_ENCRYPTIONKEY=${ENCRYPTION_KEY} + - XPACK_ENCRYPTEDSAVEDOBJECTS_ENCRYPTIONKEY=${ENCRYPTION_KEY} + - XPACK_REPORTING_ENCRYPTIONKEY=${ENCRYPTION_KEY} + mem_limit: ${KB_MEM_LIMIT} + healthcheck: + test: + [ + "CMD-SHELL", + "curl -s -I http://localhost:5601 | grep -q 'HTTP/1.1 302 Found'", + ] + interval: 10s + timeout: 10s + retries: 120 + + metricbeat01: + depends_on: + es01: + condition: service_healthy + kibana: + condition: service_healthy + image: elastic/metricbeat:${STACK_VERSION} + user: root + volumes: + - certs:/usr/share/metricbeat/certs + - metricbeatdata01:/usr/share/metricbeat/data + - "./metricbeat.yml:/usr/share/metricbeat/metricbeat.yml:ro" + - "/var/run/docker.sock:/var/run/docker.sock:ro" + - "/sys/fs/cgroup:/hostfs/sys/fs/cgroup:ro" + - "/proc:/hostfs/proc:ro" + - "/:/hostfs:ro" + environment: + - ELASTIC_USER=elastic + - ELASTIC_PASSWORD=${ELASTIC_PASSWORD} + - ELASTIC_HOSTS=https://es01:9200 + - KIBANA_HOSTS=http://kibana:5601 + # - LOGSTASH_HOSTS=http://logstash01:9600 + command: + - --strict.perms=false + + # filebeat01: + # depends_on: + # es01: + # condition: service_healthy + # image: elastic/filebeat:${STACK_VERSION} + # user: root + # volumes: + # - certs:/usr/share/filebeat/certs + # - filebeatdata01:/usr/share/filebeat/data + # - "./filebeat_ingest_data/:/usr/share/filebeat/ingest_data/" + # - "./filebeat.yml:/usr/share/filebeat/filebeat.yml:ro" + # - "/var/lib/docker/containers:/var/lib/docker/containers:ro" + # - "/var/run/docker.sock:/var/run/docker.sock:ro" + # environment: + # - ELASTIC_USER=elastic + # - ELASTIC_PASSWORD=${ELASTIC_PASSWORD} + # - ELASTIC_HOSTS=https://es01:9200 + # - KIBANA_HOSTS=http://kibana:5601 + # - LOGSTASH_HOSTS=http://logstash01:9600 + # command: + # - --strict.perms=false + + # logstash01: + # depends_on: + # es01: + # condition: service_healthy + # kibana: + # condition: service_healthy + # image: docker.elastic.co/logstash/logstash:${STACK_VERSION} + # labels: + # co.elastic.logs/module: logstash + # user: root + # volumes: + # - certs:/usr/share/logstash/certs + # - logstashdata01:/usr/share/logstash/data + # - "./logstash_ingest_data/:/usr/share/logstash/ingest_data/" + # - "./logstash.conf:/usr/share/logstash/pipeline/logstash.conf:ro" + # environment: + # - xpack.monitoring.enabled=false + # - ELASTIC_USER=elastic + # - ELASTIC_PASSWORD=${ELASTIC_PASSWORD} + # - ELASTIC_HOSTS=https://es01:9200 \ No newline at end of file diff --git a/exec b/exec new file mode 100644 index 0000000..03e6b66 --- /dev/null +++ b/exec @@ -0,0 +1,114 @@ +docker network create elastic + + + +# setup +docker run --rm -it \ + --net elastic \ + --env-file .env \ + --name setup \ + --user 0 \ + -v certs:/usr/share/elasticsearch/config/certs \ + --entrypoint bash \ + elasticsearch:9.1.3 + + + + +docker run -it --rm \ + --net elastic \ + --env-file .env \ + --name es01 \ + --label co.elastic.logs/module=elasticsearch \ + -v certs:/usr/share/elasticsearch/config/certs \ + -v esdata01:/usr/share/elasticsearch/data \ + -p 9200:9200 \ + --env node.name=es01 \ + --env cluster.name=docker-cluster \ + --env discovery.type=single-node \ + --env ELASTIC_PASSWORD=lSsAwEE1t1MacY \ + --env bootstrap.memory_lock=true \ + --env xpack.security.enabled=true \ + --env xpack.security.http.ssl.enabled=true \ + --env xpack.security.http.ssl.key=certs/es01/es01.key \ + --env xpack.security.http.ssl.certificate=certs/es01/es01.crt \ + --env xpack.security.http.ssl.certificate_authorities=certs/ca/ca.crt \ + --env xpack.security.transport.ssl.enabled=true \ + --env xpack.security.transport.ssl.key=certs/es01/es01.key \ + --env xpack.security.transport.ssl.certificate=certs/es01/es01.crt \ + --env xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt \ + --env xpack.security.transport.ssl.verification_mode=certificate \ + --env xpack.license.self_generated.type=basic \ + --memory=6gb \ + --ulimit memlock=-1:-1 \ + elasticsearch:9.1.3 + +نکته‌ها: + + + +docker cp es01:/usr/share/elasticsearch/config/certs/ca/ca.crt . +docker cp elk_stack-es01-1:/usr/share/elasticsearch/config/certs/ca/ca.crt . +curl --cacert ./ca.crt -u elastic:lSsAwEE1t1MacY https://localhost:9200 + + + + +docker run -it --rm \ + --net elastic \ + --env-file .env \ + --name kibana \ + --label co.elastic.logs/module=kibana \ + -v certs:/usr/share/kibana/config/certs \ + -v kibanadata:/usr/share/kibana/data \ + -p 3112:5601 \ + -e SERVERNAME=kibana \ + -e ELASTICSEARCH_HOSTS=https://es01:9200 \ + -e ELASTICSEARCH_USERNAME=kibana_system \ + -e ELASTICSEARCH_PASSWORD=lSsAwEE1t1MacY \ + -e ELASTICSEARCH_SSL_CERTIFICATEAUTHORITIES=config/certs/ca/ca.crt \ + -e XPACK_SECURITY_ENCRYPTIONKEY=FvpPWhx2XNcOE5FC52sLFlPjNSXnpGapQuo4S9jMvO043nfLHpzOAcwwGd6KiBoP \ + -e XPACK_ENCRYPTEDSAVEDOBJECTS_ENCRYPTIONKEY=FvpPWhx2XNcOE5FC52sLFlPjNSXnpGapQuo4S9jMvO043nfLHpzOAcwwGd6KiBoP \ + -e XPACK_REPORTING_ENCRYPTIONKEY=FvpPWhx2XNcOE5FC52sLFlPjNSXnpGapQuo4S9jMvO043nfLHpzOAcwwGd6KiBoP \ + --memory 2073741824 \ + kibana:9.1.3 + + +# docker run -it --rm --name kib01 --net elastic -p 3112:5601 \ +# -v certs:/usr/share/kibana/config/certs \ +# -v kibanadata:/usr/share/kibana/data \ +# kibana:9.1.3 + + + + + + +docker run -it --rm --name filebeat \ + --user root \ + -v /var/lib/docker/volumes/elk_stack_certs/_data/ca/ca.crt:/usr/share/filebeat/certs/ca/ca.crt \ + -v ./filebeatdata01:/usr/share/filebeat/data \ + -v "./log/:/usr/share/filebeat/ingest_data/" \ + -v "./filebeat.yml:/usr/share/filebeat/filebeat.yml:ro" \ + --env-file .env \ + --entrypoint bash --add-host=es01:host-gateway \ + elastic/filebeat:9.1.3 + + + +filebeat -e --strict.perms=false + +# https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-filestream#filebeat-input-filestream-ignore-older + + + +docker run --rm -it \ + -v $PWD/vector.yaml:/etc/vector/vector.yaml:ro \ + -v "./log/:/var/log/" \ + -v /var/lib/docker/volumes/elk_stack_certs/_data/ca/ca.crt:/certs/ca/ca.crt \ + -p 3114:8686 \ + --name vector \ + --add-host=es01:host-gateway \ + --env-file .env \ + timberio/vector:0.49.0-debian + diff --git a/log_project/.env.example b/log_project/.env.example new file mode 100644 index 0000000..23028ed --- /dev/null +++ b/log_project/.env.example @@ -0,0 +1,6 @@ +# STACK_VERSION=9.1.3 +# KIBANA_HOSTS="http://es01:3112" +ELASTIC_USER=elastic +ELASTIC_HOSTS="https://es01:3113" +ELASTIC_PASSWORD=PASSWORD +# VECTOR_LOG=debug diff --git a/log_project/docker-compose.yml b/log_project/docker-compose.yml new file mode 100644 index 0000000..d219f2e --- /dev/null +++ b/log_project/docker-compose.yml @@ -0,0 +1,21 @@ +version: "3.8" + +services: + vector: + image: timberio/vector:0.49.0-debian + container_name: vector_container_for_logs + restart: unless-stopped + # ports: + # - "3114:8686" + volumes: + - ./vector.yaml:/etc/vector/vector.yaml:ro + - ./log/:/var/log/ + - /var/lib/docker/volumes/elk_stack_certs/_data/ca/ca.crt:/certs/ca/ca.crt + extra_hosts: + - "es01:host-gateway" + environment: + - ELASTIC_USER=${ELASTIC_USER} + - ELASTIC_PASSWORD=${ELASTIC_PASSWORD} + - ELASTIC_HOSTS=${ELASTIC_HOSTS} + tty: true + stdin_open: true diff --git a/log_project/filebeat.yml b/log_project/filebeat.yml new file mode 100755 index 0000000..160a33d --- /dev/null +++ b/log_project/filebeat.yml @@ -0,0 +1,36 @@ +filebeat.inputs: +- type: filestream + id: default-filestream + paths: + - ingest_data/*.log + ignore_inactive: since_last_start + +filebeat.autodiscover: + providers: + - type: docker + hints.enabled: true + +logging.level: debug +logging.selectors: ["*"] + + +processors: +- add_docker_metadata: ~ + +setup.kibana: + host: ${KIBANA_HOSTS} + username: ${ELASTIC_USER} + password: ${ELASTIC_PASSWORD} + +output.elasticsearch: + hosts: ${ELASTIC_HOSTS} + username: ${ELASTIC_USER} + password: ${ELASTIC_PASSWORD} + ssl.enabled: true + ssl.certificate_authorities: "certs/ca/ca.crt" + index: "example_project-%{+yyyy.MM.dd}" + + +setup.template.name: "example_project-%{+yyyy.MM.dd}" +setup.template.pattern: "example_project-%{+yyyy.MM.dd}" +setup.dashboards.index: "example_project-*" \ No newline at end of file diff --git a/log_project/log_project.py b/log_project/log_project.py new file mode 100644 index 0000000..b7734ed --- /dev/null +++ b/log_project/log_project.py @@ -0,0 +1,67 @@ +import logging +import os +import json +from datetime import datetime, timezone +from logging.handlers import RotatingFileHandler +from pythonjsonlogger import jsonlogger +import logging.config + +log_dir = os.path.join(os.path.dirname(__file__), 'log') +# اگر فولدر وجود نداشت، ایجادش کن +if not os.path.exists(log_dir): + os.makedirs(log_dir) + +log_file = os.path.join(log_dir, 'app.log') +logger = logging.getLogger("structured_logger") +logger.setLevel(logging.INFO) + +handler = RotatingFileHandler(log_file, maxBytes=3*1024*1024, backupCount=5) +# فرمت لاگ به صورت JSON +class JsonFormatter(logging.Formatter): + def format(self, record): + log_record = { + # "timestamp": datetime.now().isoformat(), + "timestamp": datetime.now(timezone.utc).isoformat(), + + + "level": record.levelname, + "message": record.getMessage(), + # فیلدهای دلخواه + "user_id": getattr(record, "user_id", None), + "action": getattr(record, "action", None), + "status": getattr(record, "status", None) + } + return json.dumps(log_record, ensure_ascii=False) + +handler.setFormatter(JsonFormatter()) +logger.addHandler(handler) + + + + +# logging_file = "logging.ini" +# logging_file = os.path.join(os.path.dirname(__file__), logging_file) +# logging.config.fileConfig(logging_file) + +# logger = logging.getLogger(__name__) + +# file_handler.setFormatter(JsonFormatter()) +# logger.addHandler(file_handler) + +# تابع برای تولید لاگ با فیلدهای دلخواه +def generate_logs(x): + x = x * 4 + logger.info("برنامه شروع شد", extra={"user_id": x, "action": "start", "status": "ok"}) + logger.warning("هشدار شبیه‌سازی شد", extra={"user_id": x+1, "action": "warn_test", "status": "warning"}) + logger.error("خطا شبیه‌سازی شد", extra={"user_id": x+2, "action": "error_test", "status": "error"}) + logger.info("لاگ سفارشی", extra={"user_id": x+3, "action": "custom", "status": "ok"}) + + +def main(): + offset = 208000//4 + for i in range(1): + generate_logs(i + offset) + +if __name__ == "__main__": + main() + # print(f"لاگ‌ها در فایل {log_file} ذخیره شدند") \ No newline at end of file diff --git a/log_project/vector.yaml b/log_project/vector.yaml new file mode 100644 index 0000000..a305b44 --- /dev/null +++ b/log_project/vector.yaml @@ -0,0 +1,42 @@ +api: + enabled: false + address: 0.0.0.0:8686 + + +sources: + my_source_id: + type: file + include: + - /var/log/*.log + - /var/log/*.log.1 + read_from: end + +transforms: + only_message: + type: remap + inputs: + - my_source_id + source: | + . = parse_json!(.message) + +sinks: + # debug_console: + # type: console + # inputs: + # - my_source_id + # encoding: + # codec: json + my_sink_id: + type: elasticsearch + inputs: + - only_message + endpoints: + - ${ELASTIC_HOSTS} + auth: + password: ${ELASTIC_PASSWORD} + user: ${ELASTIC_USER} + strategy: "basic" + bulk: + index: "application-%Y-%m-%d" + tls: + ca_file: "/certs/ca/ca.crt" diff --git a/metricbeat.yml b/metricbeat.yml new file mode 100644 index 0000000..d5cc8f7 --- /dev/null +++ b/metricbeat.yml @@ -0,0 +1,56 @@ +metricbeat.config.modules: + path: ${path.config}/modules.d/*.yml + reload.enabled: false + +metricbeat.modules: +- module: elasticsearch + xpack.enabled: true + period: 10s + hosts: ${ELASTIC_HOSTS} + ssl.certificate_authorities: "certs/ca/ca.crt" + ssl.certificate: "certs/es01/es01.crt" + ssl.key: "certs/es01/es01.key" + username: ${ELASTIC_USER} + password: ${ELASTIC_PASSWORD} + ssl.enabled: true + +- module: logstash + xpack.enabled: true + period: 10s + hosts: ${LOGSTASH_HOSTS} + +- module: kibana + metricsets: + - stats + period: 10s + hosts: ${KIBANA_HOSTS} + username: ${ELASTIC_USER} + password: ${ELASTIC_PASSWORD} + xpack.enabled: true + +- module: docker + metricsets: + - "container" + - "cpu" + - "diskio" + - "healthcheck" + - "info" + #- "image" + - "memory" + - "network" + hosts: ["unix:///var/run/docker.sock"] + period: 10s + enabled: true + +processors: + - add_host_metadata: ~ + - add_docker_metadata: ~ + +output.elasticsearch: + hosts: ${ELASTIC_HOSTS} + username: ${ELASTIC_USER} + password: ${ELASTIC_PASSWORD} + ssl: + certificate: "certs/es01/es01.crt" + certificate_authorities: "certs/ca/ca.crt" + key: "certs/es01/es01.key" \ No newline at end of file diff --git a/work_with_elastic.py b/work_with_elastic.py new file mode 100644 index 0000000..d571fd6 --- /dev/null +++ b/work_with_elastic.py @@ -0,0 +1,53 @@ +from elasticsearch import Elasticsearch, helpers + + +client = Elasticsearch( + "https://127.0.0.1:3113", + api_key="cnpEaUxaa0JPYlRRZTVkQmRva2I6VW5wWFpLYjRIMW9KT3FKM25hZW45UQ==", + ca_certs="./ca.crt" +) + +index_name = "search-87wk" + +mappings = { + "properties": { + "text": { + "type": "text" + } + } +} + +mapping_response = client.indices.put_mapping(index=index_name, body=mappings) +print(mapping_response) + + + + +index_name = "search-87wk" + +docs = [ + { + "text": "Yellowstone National Park is one of the largest national parks in the United States. It ranges from the Wyoming to Montana and Idaho, and contains an area of 2,219,791 acress across three different states. Its most famous for hosting the geyser Old Faithful and is centered on the Yellowstone Caldera, the largest super volcano on the American continent. Yellowstone is host to hundreds of species of animal, many of which are endangered or threatened. Most notably, it contains free-ranging herds of bison and elk, alongside bears, cougars and wolves. The national park receives over 4.5 million visitors annually and is a UNESCO World Heritage Site." + }, + { + "text": "Yosemite National Park is a United States National Park, covering over 750,000 acres of land in California. A UNESCO World Heritage Site, the park is best known for its granite cliffs, waterfalls and giant sequoia trees. Yosemite hosts over four million visitors in most years, with a peak of five million visitors in 2016. The park is home to a diverse range of wildlife, including mule deer, black bears, and the endangered Sierra Nevada bighorn sheep. The park has 1,200 square miles of wilderness, and is a popular destination for rock climbers, with over 3,000 feet of vertical granite to climb. Its most famous and cliff is the El Capitan, a 3,000 feet monolith along its tallest face." + }, + { + "text": "Rocky Mountain National Park is one of the most popular national parks in the United States. It receives over 4.5 million visitors annually, and is known for its mountainous terrain, including Longs Peak, which is the highest peak in the park. The park is home to a variety of wildlife, including elk, mule deer, moose, and bighorn sheep. The park is also home to a variety of ecosystems, including montane, subalpine, and alpine tundra. The park is a popular destination for hiking, camping, and wildlife viewing, and is a UNESCO World Heritage Site." + } +] + +# Timeout to allow machine learning model loading and semantic ingestion to complete +ingestion_timeout=300 +from elasticsearch.helpers import bulk, BulkIndexError +try: + bulk_response = helpers.bulk( + client.options(request_timeout=ingestion_timeout), + docs, + index=index_name + ) +except BulkIndexError as e: + print("Documents failed to index:") + for error in e.errors: + print(error) +print(bulk_response) \ No newline at end of file