Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DRAFT: move-db-to-delta #1031

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions dump_schema.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# run this against the old database you are dumping from
# after you execute shell in db run this script

DB_NAME="pals-production-hyku" # Replace with your database name
DB_USER="postgres" # Replace with your database username
DUMP_DIR="/tmp/postgres" # Replace with your desired directory to store dump files

# Create dump directory if it doesn't exist
mkdir -p $DUMP_DIR

# Get the list of schemas excluding system schemas
SCHEMAS=$(psql -U $DB_USER -d $DB_NAME -t -c "SELECT schema_name FROM information_schema.schemata WHERE schema_name NOT IN ('pg_catalog', 'information_schema', 'pg_toast');")

# Loop through each schema and dump it
for SCHEMA in $SCHEMAS; do
# Remove leading/trailing whitespace from schema name
SCHEMA=$(echo $SCHEMA | xargs)

# Output message
echo "Dumping schema: $SCHEMA"

# Dump the schema
pg_dump -U $DB_USER -d $DB_NAME -n $SCHEMA -F c -f "$DUMP_DIR/${SCHEMA}.dump"

# Check if the dump was successful
if [ $? -eq 0 ]; then
echo "Successfully dumped schema: $SCHEMA"
else
echo "Failed to dump schema: $SCHEMA" >&2
fi
done
40 changes: 40 additions & 0 deletions lib/tasks/delete_old_searches.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
namespace :search do
desc 'Delete old unsaved searches for multiple tenants in batches'
task delete_old_searches: :environment do
days_old = ENV['DAYS_OLD'].to_i
batch_size = ENV['BATCH_SIZE'].to_i
tenant_ids = Account.pluck(:id)

if days_old <= 0 || batch_size <= 0
puts 'Valid DAYS_OLD and BATCH_SIZE are required.'
exit 1
end

tenant_ids.each do |tenant_id|
account = Account.find(tenant_id)
next if account.nil?

# Switch tenant context
switch!(account)

# Run the delete_old_searches method in batches
loop do
old_searches = Search.where(['created_at < ? AND user_id IS NULL', Time.zone.today - days_old])
.limit(batch_size)

break if old_searches.empty?

old_searches.delete_all
puts "Deleted #{old_searches.size} searches in batch for tenant #{tenant_id}."
sleep(1) # Optional: add a short delay to reduce load
end

# Perform VACUUM FULL on the searches table in the specified schema
schema_name = account.tenant
ActiveRecord::Base.connection.execute("VACUUM FULL \"#{schema_name}\".searches;")
puts "Performed VACUUM FULL on searches table for tenant #{tenant_id}."

puts "Completed deletion of old searches older than #{days_old} days for tenant #{tenant_id}."
end
end
end
59 changes: 59 additions & 0 deletions lib/tasks/pg_dump_by_schema.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
namespace :db do
desc 'Dump each tenant schema to a separate file in the root directory after checking schema size'
task dump_schemas: :environment do
# Fetch database configuration from Rails environment
db_config = Rails.configuration.database_configuration[Rails.env]

pg_password = db_config['password']
pg_user = db_config['username']
pg_host = db_config['host']
pg_port = db_config['port'] || '5432'
db_name = db_config['database']

# Set the PGPASSWORD environment variable
ENV['PGPASSWORD'] = pg_password

# Print the values for debugging
puts "PGPASSWORD: #{ENV['PGPASSWORD']}"
puts "PGUSER: #{pg_user}"
puts "PGHOST: #{pg_host}"
puts "PGPORT: #{pg_port}"
puts "DB_NAME: #{db_name}"

# Iterate over each account
Account.find_each do |account|
schema_name = account.tenant
tenant_name = account.name
tenant_id = account.id
next if schema_name.blank? # Skip if the tenant is blank

# Check the size of the schema
result = ActiveRecord::Base.connection.execute("
SELECT pg_size_pretty(sum(pg_total_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)))) AS total_size
FROM pg_tables
WHERE schemaname = '#{schema_name}';
")

# Get the size from the result
schema_size = result[0]['total_size']
puts "tenant_id #{tenant_id}, tenant_name: #{tenant_name}, schema_name: #{schema_name} schema_size: #{schema_size}"

# Define the output file path
dump_file = "/app/samvera/hyrax-webapp/tmp/#{schema_name}_dump.sql"

# Construct the pg_dump command
command = "pg_dump -U #{pg_user} -h #{pg_host} -p #{pg_port} -d #{db_name} --schema='#{schema_name}' -f #{dump_file}"

# Run the command
puts "Dumping schema #{schema_name} to #{dump_file}"
system(command)

# Check if the command was successful
if $?.exitstatus == 0
puts "Successfully dumped schema #{schema_name}"
else
puts "Error dumping schema #{schema_name}"
end
end
end
end
4 changes: 2 additions & 2 deletions ops/production-deploy.tmpl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ extraEnvVars: &envVars
- name: DB_ADAPTER
value: postgresql
- name: DB_HOST
value: postgres-cluster-alpha-ha.postgres.svc.cluster.local
value: acid-postgres-cluster-delta.postgres.svc.cluster.local
- name: DB_NAME
value: pals-production-hyku
- name: DB_PASSWORD
Expand Down Expand Up @@ -398,7 +398,7 @@ fits:
subPath: /fits

externalPostgresql:
host: postgres-cluster-alpha-ha.postgres.svc.cluster.local
host: acid-postgres-cluster-delta.postgres.svc.cluster.local
username: main
password: $DB_PASSWORD
database: pals-production-hyku
Expand Down
Loading