multi_sink.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. ################################################################################
  2. # Licensed to the Apache Software Foundation (ASF) under one
  3. # or more contributor license agreements. See the NOTICE file
  4. # distributed with this work for additional information
  5. # regarding copyright ownership. The ASF licenses this file
  6. # to you under the Apache License, Version 2.0 (the
  7. # "License"); you may not use this file except in compliance
  8. # with the License. You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. ################################################################################
  18. import logging
  19. import sys
  20. from pyflink.table import (EnvironmentSettings, TableEnvironment, DataTypes)
  21. from pyflink.table.udf import udf
  22. def multi_sink():
  23. t_env = TableEnvironment.create(EnvironmentSettings.in_streaming_mode())
  24. table = t_env.from_elements(
  25. elements=[(1, 'Hello'), (2, 'World'), (3, "Flink"), (4, "PyFlink")],
  26. schema=['id', 'data'])
  27. # define the sink tables
  28. t_env.execute_sql("""
  29. CREATE TABLE first_sink (
  30. id BIGINT,
  31. data VARCHAR
  32. ) WITH (
  33. 'connector' = 'print'
  34. )
  35. """)
  36. t_env.execute_sql("""
  37. CREATE TABLE second_sink (
  38. id BIGINT,
  39. data VARCHAR
  40. ) WITH (
  41. 'connector' = 'print'
  42. )
  43. """)
  44. # create a statement set
  45. statement_set = t_env.create_statement_set()
  46. # emit the data with id <= 3 to the "first_sink" via sql statement
  47. statement_set.add_insert_sql("INSERT INTO first_sink SELECT * FROM %s WHERE id <= 3" % table)
  48. # emit the data which contains "Flink" to the "second_sink"
  49. @udf(result_type=DataTypes.BOOLEAN())
  50. def contains_flink(data):
  51. return "Flink" in data
  52. second_table = table.where(contains_flink(table.data))
  53. statement_set.add_insert("second_sink", second_table)
  54. # execute the statement set
  55. # remove .wait if submitting to a remote cluster, refer to
  56. # https://nightlies.apache.org/flink/flink-docs-stable/docs/dev/python/faq/#wait-for-jobs-to-finish-when-executing-jobs-in-mini-cluster
  57. # for more details
  58. statement_set.execute().wait()
  59. if __name__ == '__main__':
  60. logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s")
  61. multi_sink()