elasticsearch.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. ################################################################################
  2. # Licensed to the Apache Software Foundation (ASF) under one
  3. # or more contributor license agreements. See the NOTICE file
  4. # distributed with this work for additional information
  5. # regarding copyright ownership. The ASF licenses this file
  6. # to you under the Apache License, Version 2.0 (the
  7. # "License"); you may not use this file except in compliance
  8. # with the License. You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. ################################################################################
  18. import logging
  19. import sys
  20. from pyflink.datastream.connectors.elasticsearch import Elasticsearch6SinkBuilder, \
  21. Elasticsearch7SinkBuilder, FlushBackoffType, ElasticsearchEmitter
  22. from pyflink.common import Types
  23. from pyflink.datastream import StreamExecutionEnvironment
  24. from pyflink.datastream.connectors import DeliveryGuarantee
  25. def write_to_es6(env):
  26. ELASTICSEARCH_SQL_CONNECTOR_PATH = \
  27. 'file:///path/to/flink-sql-connector-elasticsearch6-1.16.0.jar'
  28. env.add_jars(ELASTICSEARCH_SQL_CONNECTOR_PATH)
  29. ds = env.from_collection(
  30. [{'name': 'ada', 'id': '1'}, {'name': 'luna', 'id': '2'}],
  31. type_info=Types.MAP(Types.STRING(), Types.STRING()))
  32. es_sink = Elasticsearch6SinkBuilder() \
  33. .set_emitter(ElasticsearchEmitter.static_index('foo', 'id', 'bar')) \
  34. .set_hosts(['localhost:9200']) \
  35. .set_delivery_guarantee(DeliveryGuarantee.AT_LEAST_ONCE) \
  36. .set_bulk_flush_max_actions(1) \
  37. .set_bulk_flush_max_size_mb(2) \
  38. .set_bulk_flush_interval(1000) \
  39. .set_bulk_flush_backoff_strategy(FlushBackoffType.CONSTANT, 3, 3000) \
  40. .set_connection_username('foo') \
  41. .set_connection_password('bar') \
  42. .set_connection_path_prefix('foo-bar') \
  43. .set_connection_request_timeout(30000) \
  44. .set_connection_timeout(31000) \
  45. .set_socket_timeout(32000) \
  46. .build()
  47. ds.sink_to(es_sink).name('es6 sink')
  48. env.execute()
  49. def write_to_es6_dynamic_index(env):
  50. ELASTICSEARCH_SQL_CONNECTOR_PATH = \
  51. 'file:///path/to/flink-sql-connector-elasticsearch6-1.16.0.jar'
  52. env.add_jars(ELASTICSEARCH_SQL_CONNECTOR_PATH)
  53. ds = env.from_collection(
  54. [{'name': 'ada', 'id': '1'}, {'name': 'luna', 'id': '2'}],
  55. type_info=Types.MAP(Types.STRING(), Types.STRING()))
  56. es_sink = Elasticsearch6SinkBuilder() \
  57. .set_emitter(ElasticsearchEmitter.dynamic_index('name', 'id', 'bar')) \
  58. .set_hosts(['localhost:9200']) \
  59. .build()
  60. ds.sink_to(es_sink).name('es6 dynamic index sink')
  61. env.execute()
  62. def write_to_es7(env):
  63. ELASTICSEARCH_SQL_CONNECTOR_PATH = \
  64. 'file:///path/to/flink-sql-connector-elasticsearch7-1.16.0.jar'
  65. env.add_jars(ELASTICSEARCH_SQL_CONNECTOR_PATH)
  66. ds = env.from_collection(
  67. [{'name': 'ada', 'id': '1'}, {'name': 'luna', 'id': '2'}],
  68. type_info=Types.MAP(Types.STRING(), Types.STRING()))
  69. es7_sink = Elasticsearch7SinkBuilder() \
  70. .set_emitter(ElasticsearchEmitter.static_index('foo', 'id')) \
  71. .set_hosts(['localhost:9200']) \
  72. .set_delivery_guarantee(DeliveryGuarantee.AT_LEAST_ONCE) \
  73. .set_bulk_flush_max_actions(1) \
  74. .set_bulk_flush_max_size_mb(2) \
  75. .set_bulk_flush_interval(1000) \
  76. .set_bulk_flush_backoff_strategy(FlushBackoffType.CONSTANT, 3, 3000) \
  77. .set_connection_username('foo') \
  78. .set_connection_password('bar') \
  79. .set_connection_path_prefix('foo-bar') \
  80. .set_connection_request_timeout(30000) \
  81. .set_connection_timeout(31000) \
  82. .set_socket_timeout(32000) \
  83. .build()
  84. ds.sink_to(es7_sink).name('es7 sink')
  85. env.execute()
  86. def write_to_es7_dynamic_index(env):
  87. ELASTICSEARCH_SQL_CONNECTOR_PATH = \
  88. 'file:///path/to/flink-sql-connector-elasticsearch7-1.16.0.jar'
  89. env.add_jars(ELASTICSEARCH_SQL_CONNECTOR_PATH)
  90. ds = env.from_collection(
  91. [{'name': 'ada', 'id': '1'}, {'name': 'luna', 'id': '2'}],
  92. type_info=Types.MAP(Types.STRING(), Types.STRING()))
  93. es7_sink = Elasticsearch7SinkBuilder() \
  94. .set_emitter(ElasticsearchEmitter.dynamic_index('name', 'id')) \
  95. .set_hosts(['localhost:9200']) \
  96. .build()
  97. ds.sink_to(es7_sink).name('es7 dynamic index sink')
  98. env.execute()
  99. if __name__ == '__main__':
  100. logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s")
  101. env = StreamExecutionEnvironment.get_execution_environment()
  102. env.set_parallelism(1)
  103. print("start writing data to elasticsearch6")
  104. write_to_es6(env)
  105. write_to_es6_dynamic_index(env)
  106. print("start writing data to elasticsearch7")
  107. write_to_es7(env)
  108. write_to_es7_dynamic_index(env)