diff --git a/dump_schema.sh b/dump_schema.sh new file mode 100644 index 000000000..57f405062 --- /dev/null +++ b/dump_schema.sh @@ -0,0 +1,31 @@ +# run this against the old database you are dumping from +# after you execute shell in db run this script + +DB_NAME="pals-production-hyku" # Replace with your database name +DB_USER="postgres" # Replace with your database username +DUMP_DIR="/tmp/postgres" # Replace with your desired directory to store dump files + +# Create dump directory if it doesn't exist +mkdir -p $DUMP_DIR + +# Get the list of schemas excluding system schemas +SCHEMAS=$(psql -U $DB_USER -d $DB_NAME -t -c "SELECT schema_name FROM information_schema.schemata WHERE schema_name NOT IN ('pg_catalog', 'information_schema', 'pg_toast');") + +# Loop through each schema and dump it +for SCHEMA in $SCHEMAS; do + # Remove leading/trailing whitespace from schema name + SCHEMA=$(echo $SCHEMA | xargs) + + # Output message + echo "Dumping schema: $SCHEMA" + + # Dump the schema + pg_dump -U $DB_USER -d $DB_NAME -n $SCHEMA -F c -f "$DUMP_DIR/${SCHEMA}.dump" + + # Check if the dump was successful + if [ $? -eq 0 ]; then + echo "Successfully dumped schema: $SCHEMA" + else + echo "Failed to dump schema: $SCHEMA" >&2 + fi +done diff --git a/lib/tasks/delete_old_searches.rake b/lib/tasks/delete_old_searches.rake new file mode 100644 index 000000000..a3c1e77a0 --- /dev/null +++ b/lib/tasks/delete_old_searches.rake @@ -0,0 +1,40 @@ +namespace :search do + desc 'Delete old unsaved searches for multiple tenants in batches' + task delete_old_searches: :environment do + days_old = ENV['DAYS_OLD'].to_i + batch_size = ENV['BATCH_SIZE'].to_i + tenant_ids = Account.pluck(:id) + + if days_old <= 0 || batch_size <= 0 + puts 'Valid DAYS_OLD and BATCH_SIZE are required.' + exit 1 + end + + tenant_ids.each do |tenant_id| + account = Account.find(tenant_id) + next if account.nil? + + # Switch tenant context + switch!(account) + + # Run the delete_old_searches method in batches + loop do + old_searches = Search.where(['created_at < ? AND user_id IS NULL', Time.zone.today - days_old]) + .limit(batch_size) + + break if old_searches.empty? + + old_searches.delete_all + puts "Deleted #{old_searches.size} searches in batch for tenant #{tenant_id}." + sleep(1) # Optional: add a short delay to reduce load + end + + # Perform VACUUM FULL on the searches table in the specified schema + schema_name = account.tenant + ActiveRecord::Base.connection.execute("VACUUM FULL \"#{schema_name}\".searches;") + puts "Performed VACUUM FULL on searches table for tenant #{tenant_id}." + + puts "Completed deletion of old searches older than #{days_old} days for tenant #{tenant_id}." + end + end +end diff --git a/lib/tasks/pg_dump_by_schema.rake b/lib/tasks/pg_dump_by_schema.rake new file mode 100644 index 000000000..9066072bb --- /dev/null +++ b/lib/tasks/pg_dump_by_schema.rake @@ -0,0 +1,59 @@ +namespace :db do + desc 'Dump each tenant schema to a separate file in the root directory after checking schema size' + task dump_schemas: :environment do + # Fetch database configuration from Rails environment + db_config = Rails.configuration.database_configuration[Rails.env] + + pg_password = db_config['password'] + pg_user = db_config['username'] + pg_host = db_config['host'] + pg_port = db_config['port'] || '5432' + db_name = db_config['database'] + + # Set the PGPASSWORD environment variable + ENV['PGPASSWORD'] = pg_password + + # Print the values for debugging + puts "PGPASSWORD: #{ENV['PGPASSWORD']}" + puts "PGUSER: #{pg_user}" + puts "PGHOST: #{pg_host}" + puts "PGPORT: #{pg_port}" + puts "DB_NAME: #{db_name}" + + # Iterate over each account + Account.find_each do |account| + schema_name = account.tenant + tenant_name = account.name + tenant_id = account.id + next if schema_name.blank? # Skip if the tenant is blank + + # Check the size of the schema + result = ActiveRecord::Base.connection.execute(" + SELECT pg_size_pretty(sum(pg_total_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)))) AS total_size + FROM pg_tables + WHERE schemaname = '#{schema_name}'; + ") + + # Get the size from the result + schema_size = result[0]['total_size'] + puts "tenant_id #{tenant_id}, tenant_name: #{tenant_name}, schema_name: #{schema_name} schema_size: #{schema_size}" + + # Define the output file path + dump_file = "/app/samvera/hyrax-webapp/tmp/#{schema_name}_dump.sql" + + # Construct the pg_dump command + command = "pg_dump -U #{pg_user} -h #{pg_host} -p #{pg_port} -d #{db_name} --schema='#{schema_name}' -f #{dump_file}" + + # Run the command + puts "Dumping schema #{schema_name} to #{dump_file}" + system(command) + + # Check if the command was successful + if $?.exitstatus == 0 + puts "Successfully dumped schema #{schema_name}" + else + puts "Error dumping schema #{schema_name}" + end + end + end +end \ No newline at end of file diff --git a/ops/production-deploy.tmpl.yaml b/ops/production-deploy.tmpl.yaml index e78ef555e..ece0e1986 100644 --- a/ops/production-deploy.tmpl.yaml +++ b/ops/production-deploy.tmpl.yaml @@ -68,7 +68,7 @@ extraEnvVars: &envVars - name: DB_ADAPTER value: postgresql - name: DB_HOST - value: postgres-cluster-alpha-ha.postgres.svc.cluster.local + value: acid-postgres-cluster-delta.postgres.svc.cluster.local - name: DB_NAME value: pals-production-hyku - name: DB_PASSWORD @@ -398,7 +398,7 @@ fits: subPath: /fits externalPostgresql: - host: postgres-cluster-alpha-ha.postgres.svc.cluster.local + host: acid-postgres-cluster-delta.postgres.svc.cluster.local username: main password: $DB_PASSWORD database: pals-production-hyku