backup.sh 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. #!/bin/bash
  2. export TIMESTAMP="$(date "+%Y-%m-%d-%H:%M:%S")"
  3. if [ -z $NEO4J_ADDR ]; then
  4. echo "You must specify a NEO4J_ADDR env var with port, such as my-neo4j:6362"
  5. exit 1
  6. fi
  7. if [ -z $DATABASE ]; then
  8. echo "You must specify a DATABASE env var; comma-separated list of databases to backup, such as neo4j,system"
  9. exit 1
  10. fi
  11. if [ -z $CLOUD_PROVIDER ]; then
  12. echo "You must specify a CLOUD_PROVIDER env var"
  13. exit 1
  14. fi
  15. if [ -z $BUCKET ]; then
  16. echo "You must specify a BUCKET address such as (gs|s3)://my-backups"
  17. exit 1
  18. fi
  19. if [ -z $HEAP_SIZE ]; then
  20. export HEAP_SIZE=2G
  21. fi
  22. if [ -z $PAGE_CACHE ]; then
  23. export PAGE_CACHE=2G
  24. fi
  25. if [ -z $FALLBACK_TO_FULL ]; then
  26. export FALLBACK_TO_FULL="true"
  27. fi
  28. if [ -z $CHECK_CONSISTENCY ]; then
  29. export CHECK_CONSISTENCY="true"
  30. fi
  31. if [ -z $CHECK_INDEXES ]; then
  32. export CHECK_INDEXES="true"
  33. fi
  34. if [ -z $CHECK_GRAPH ]; then
  35. export CHECK_GRAPH="true"
  36. fi
  37. if [ -z $CHECK_LABEL_SCAN_STORE ]; then
  38. export CHECK_LABEL_SCAN_STORE="true"
  39. fi
  40. if [ -z $CHECK_PROPERTY_OWNERS ]; then
  41. export CHECK_PROPERTY_OWNERS="false"
  42. fi
  43. if [ -z $REMOVE_EXISTING_FILES ]; then
  44. export REMOVE_EXISTING_FILES="true"
  45. fi
  46. if [ -z $REMOVE_BACKUP_FILES ]; then
  47. export REMOVE_BACKUP_FILES="true"
  48. fi
  49. function clean_backups_directory() {
  50. echo "Removing any existing files from /backups"
  51. rm -rfv /backups/*
  52. }
  53. function cloud_copy() {
  54. backup_path=$1
  55. database=$2
  56. artifact_type=$3
  57. bucket_path=""
  58. if [ "${BUCKET: -1}" = "/" ]; then
  59. bucket_path="${BUCKET%?}/$database/"
  60. else
  61. bucket_path="$BUCKET/$database/"
  62. fi
  63. echo "Pushing $backup_path -> $bucket_path"
  64. case $CLOUD_PROVIDER in
  65. aws)
  66. aws s3 cp $backup_path $bucket_path
  67. if [ "${artifact_type}" = "backup" ]; then
  68. aws s3 cp $backup_path "${bucket_path}${LATEST_POINTER}"
  69. fi
  70. ;;
  71. gcp)
  72. gsutil cp $backup_path $bucket_path
  73. if [ "${artifact_type}" = "backup" ]; then
  74. gsutil cp $backup_path "${bucket_path}${LATEST_POINTER}"
  75. fi
  76. ;;
  77. azure)
  78. # Container is specified via BUCKET input, which can contain a path, i.e.
  79. # my-container/foo
  80. # AZ CLI doesn't allow this so we need to split it into container and container path.
  81. IFS='/' read -r -a pathParts <<< "$BUCKET"
  82. CONTAINER=${pathParts[0]}
  83. # See: https://stackoverflow.com/a/10987027
  84. CONTAINER_PATH=${BUCKET#$CONTAINER}
  85. CONTAINER_FILE=$CONTAINER_PATH/$database/$(basename "$backup_path")
  86. # Remove all leading and doubled slashes to avoid creating empty folders in azure
  87. CONTAINER_FILE=$(echo "$CONTAINER_FILE" | sed 's|^/*||')
  88. CONTAINER_FILE=$(echo "$CONTAINER_FILE" | sed s'|//|/|g')
  89. echo "Azure storage blob copy to $CONTAINER :: $CONTAINER_FILE"
  90. az storage blob upload --container-name "$CONTAINER" \
  91. --file "$backup_path" \
  92. --name $CONTAINER_FILE \
  93. --account-name "$ACCOUNT_NAME" \
  94. --account-key "$ACCOUNT_KEY" \
  95. --overwrite "true"
  96. if [ "${artifact_type}" = "backup" ]; then
  97. latest_name=$CONTAINER_PATH/$database/${LATEST_POINTER}
  98. # Remove all leading and doubled slashes to avoid creating empty folders in azure
  99. latest_name=$(echo "$latest_name" | sed 's|^/*||')
  100. latest_name=$(echo "$latest_name" | sed s'|//|/|g')
  101. echo "Azure storage blob copy to $CONTAINER :: $latest_name"
  102. az storage blob upload --container-name "$CONTAINER" \
  103. --file "$backup_path" \
  104. --name "$latest_name" \
  105. --account-name "$ACCOUNT_NAME" \
  106. --account-key "$ACCOUNT_KEY"
  107. fi
  108. ;;
  109. esac
  110. }
  111. function upload_report() {
  112. echo "Archiving and Compressing -> ${REPORT_DIR}/$BACKUP_SET.tar"
  113. tar -zcvf "backups/$BACKUP_SET.report.tar.gz" "${REPORT_DIR}" --remove-files
  114. if [ $? -ne 0 ]; then
  115. echo "REPORT ARCHIVING OF ${REPORT_DIR} FAILED"
  116. exit 1
  117. fi
  118. echo "Zipped report size:"
  119. du -hs "/backups/$BACKUP_SET.report.tar.gz"
  120. cloud_copy "/backups/$BACKUP_SET.report.tar.gz" $db "report"
  121. if [ $? -ne 0 ]; then
  122. echo "Storage copy of report for ${REPORT_DIR} FAILED"
  123. exit 1
  124. else
  125. echo "Removing /backups/$BACKUP_SET.report.tar.gz"
  126. rm "/backups/$BACKUP_SET.report.tar.gz"
  127. fi
  128. }
  129. function backup_database() {
  130. db=$1
  131. export REPORT_DIR="/backups/.report_$db"
  132. mkdir -p "${REPORT_DIR}"
  133. echo "Removing any existing files from ${REPORT_DIR}"
  134. rm -rfv "${REPORT_DIR}"/*
  135. export BACKUP_SET="$db-${TIMESTAMP}"
  136. export LATEST_POINTER="$db-latest.tar.gz"
  137. echo "=============== BACKUP $db ==================="
  138. echo "Beginning backup from $NEO4J_ADDR to /backups/$BACKUP_SET"
  139. echo "Using heap size $HEAP_SIZE and page cache $PAGE_CACHE"
  140. echo "FALLBACK_TO_FULL=$FALLBACK_TO_FULL, CHECK_CONSISTENCY=$CHECK_CONSISTENCY"
  141. echo "CHECK_GRAPH=$CHECK_GRAPH CHECK_INDEXES=$CHECK_INDEXES"
  142. echo "CHECK_LABEL_SCAN_STORE=$CHECK_LABEL_SCAN_STORE CHECK_PROPERTY_OWNERS=$CHECK_PROPERTY_OWNERS"
  143. echo "To storage bucket $BUCKET using $CLOUD_PROVIDER"
  144. echo "============================================================"
  145. neo4j-admin backup \
  146. --from="$NEO4J_ADDR" \
  147. --backup-dir=/backups \
  148. --database=$db \
  149. --pagecache=$PAGE_CACHE \
  150. --fallback-to-full=$FALLBACK_TO_FULL \
  151. --check-consistency=$CHECK_CONSISTENCY \
  152. --report-dir="${REPORT_DIR}" \
  153. --check-graph=$CHECK_GRAPH \
  154. --check-indexes=$CHECK_INDEXES \
  155. --check-label-scan-store=$CHECK_LABEL_SCAN_STORE \
  156. --check-property-owners=$CHECK_PROPERTY_OWNERS \
  157. --verbose \
  158. | tee "${REPORT_DIR}/backup.log"
  159. # Docs: see exit codes here: https://neo4j.com/docs/operations-manual/current/backup/performing/#backup-performing-command
  160. backup_result=$?
  161. case $backup_result in
  162. 0) echo "Backup succeeded - $db" ;;
  163. 1) echo "Backup FAILED - $db" ;;
  164. 2) echo "Backup succeeded but consistency check failed - $db" ;;
  165. 3) echo "Backup succeeded but consistency check found inconsistencies - $db" ;;
  166. esac
  167. echo "Backup report(s):"
  168. du -hs "${REPORT_DIR}"
  169. ls -l "${REPORT_DIR}"
  170. if [ $backup_result -eq 1 ]; then
  171. upload_report
  172. echo "Aborting other actions; backup failed"
  173. exit 1
  174. fi
  175. echo "Backup size:"
  176. du -hs "/backups/$db"
  177. echo "Final Backupset files"
  178. ls -l "/backups/$db"
  179. echo "Archiving and Compressing -> /backups/$BACKUP_SET.tar"
  180. if [ "${REMOVE_BACKUP_FILES}" == "true" ]; then
  181. tar -zcvf "/backups/$BACKUP_SET.tar.gz" "/backups/$db" --remove-files
  182. else
  183. tar -zcvf "/backups/$BACKUP_SET.tar.gz" "/backups/$db"
  184. fi
  185. if [ $? -ne 0 ]; then
  186. echo "BACKUP ARCHIVING OF $db FAILED"
  187. exit 1
  188. fi
  189. echo "Zipped backup size:"
  190. du -hs "/backups/$BACKUP_SET.tar.gz"
  191. cloud_copy "/backups/$BACKUP_SET.tar.gz" $db "backup"
  192. if [ $? -ne 0 ]; then
  193. echo "Storage copy of backup for $db FAILED"
  194. exit 1
  195. else
  196. echo "Removing /backups/$BACKUP_SET.tar.gz"
  197. rm "/backups/$BACKUP_SET.tar.gz"
  198. fi
  199. upload_report
  200. }
  201. function activate_gcp() {
  202. local credentials="/credentials/credentials"
  203. if [[ -f "${credentials}" ]]; then
  204. echo "Activating google credentials before beginning"
  205. gcloud auth activate-service-account --key-file "${credentials}"
  206. if [ $? -ne 0 ]; then
  207. echo "Credentials failed; no way to copy to google."
  208. exit 1
  209. fi
  210. else
  211. echo "No credentials file found. Assuming workload identity is configured"
  212. fi
  213. }
  214. function activate_aws() {
  215. local credentials="/credentials/credentials"
  216. if [[ -f "${credentials}" ]]; then
  217. echo "Activating aws credentials before beginning"
  218. mkdir -p /root/.aws/
  219. cp /credentials/credentials ~/.aws/config
  220. if [ $? -ne 0 ]; then
  221. echo "Credentials failed; no way to copy to aws."
  222. exit 1
  223. fi
  224. aws sts get-caller-identity
  225. if [ $? -ne 0 ]; then
  226. echo "Credentials failed; no way to copy to aws."
  227. exit 1
  228. fi
  229. else
  230. echo "No credentials file found. Assuming IAM Role for Service Account - IRSA is configured"
  231. fi
  232. }
  233. function activate_azure() {
  234. echo "Activating azure credentials before beginning"
  235. source "/credentials/credentials"
  236. if [ -z $ACCOUNT_NAME ]; then
  237. echo "You must specify a ACCOUNT_NAME export statement in the credentials secret which is the storage account where backups are stored"
  238. exit 1
  239. fi
  240. if [ -z $ACCOUNT_KEY ]; then
  241. echo "You must specify a ACCOUNT_KEY export statement in the credentials secret which is the storage account where backups are stored"
  242. exit 1
  243. fi
  244. }
  245. if [ "${REMOVE_EXISTING_FILES}" == "true" ]; then
  246. clean_backups_directory
  247. fi
  248. case $CLOUD_PROVIDER in
  249. azure)
  250. activate_azure
  251. ;;
  252. aws)
  253. activate_aws
  254. ;;
  255. gcp)
  256. activate_gcp
  257. ;;
  258. *)
  259. echo "Invalid CLOUD_PROVIDER=$CLOUD_PROVIDER"
  260. echo "You must set CLOUD_PROVIDER to be one of (aws|gcp|azure)"
  261. exit 1
  262. ;;
  263. esac
  264. # Split by comma
  265. IFS=","
  266. read -a databases <<<"$DATABASE"
  267. for db in "${databases[@]}"; do
  268. backup_database "$db"
  269. done
  270. echo "All finished"
  271. exit 0