Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP enabling transcription integration for giant #103

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions packages/backend-common/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { findParameter, getParameters } from './configHelpers';
import { Parameter, SSM } from '@aws-sdk/client-ssm';
import { defaultProvider } from '@aws-sdk/credential-provider-node';
import { logger } from '@guardian/transcription-service-backend-common';
import { DestinationService } from '@guardian/transcription-service-common';
export interface TranscriptionConfig {
auth: {
clientId: string;
Expand All @@ -16,9 +17,7 @@ export interface TranscriptionConfig {
emailNotificationFromAddress: string;
sourceMediaBucket: string;
transcriptionOutputBucket: string;
destinationQueueUrls: {
transcriptionService: string;
};
destinationQueueUrls: DestinationQueueUrls;
tableName: string;
};
aws: {
Expand All @@ -27,6 +26,11 @@ export interface TranscriptionConfig {
};
}

type DestinationQueueUrls = {
[DestinationService.TranscriptionService]: string;
[DestinationService.Giant]: string;
};

const credentialProvider = (onAws: boolean) =>
onAws ? undefined : defaultProvider({ profile: 'investigations' });

Expand Down Expand Up @@ -76,11 +80,18 @@ export const getConfig = async (): Promise<TranscriptionConfig> => {
stage === 'DEV'
? undefined
: findParameter(parameters, paramPath, 'deadLetterQueueUrl');
const destinationTopic = findParameter(

const destinationQueue = findParameter(
parameters,
paramPath,
'destinationQueueUrls/transcriptionService',
);

const giantDestinationQueue = findParameter(
parameters,
paramPath,
'destinationQueueUrls/giant',
);
// AWS clients take an optional 'endpoint' property that is only needed by localstack - on code/prod you don't need
// to set it. Here we inder the endpoint (http://localhost:4566) from the sqs url
const localstackEndpoint =
Expand Down Expand Up @@ -132,7 +143,8 @@ export const getConfig = async (): Promise<TranscriptionConfig> => {
sourceMediaBucket,
emailNotificationFromAddress,
destinationQueueUrls: {
transcriptionService: destinationTopic,
[DestinationService.TranscriptionService]: destinationQueue,
[DestinationService.Giant]: giantDestinationQueue,
},
tableName,
transcriptionOutputBucket,
Expand Down
14 changes: 13 additions & 1 deletion packages/cdk/lib/transcription-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,15 @@ export class TranscriptionService extends GuStack {
},
);

const giantTranscriptionOutputQueueArn = new GuStringParameter(
this,
'GiantTranscriptionOutputQueueArn',
{
fromSSM: true,
default: `/${props.stage}/investigations/GiantTranscriptionOutputQueueArn`,
},
).valueAsString;

const ssmPrefix = `arn:aws:ssm:${props.env.region}:${this.account}:parameter`;
const ssmPath = `${this.stage}/${this.stack}/${APP_NAME}`;
const domainName =
Expand Down Expand Up @@ -281,7 +290,10 @@ export class TranscriptionService extends GuStack {
}),
new GuAllowPolicy(this, 'WriteToDestinationTopic', {
actions: ['sqs:SendMessage'],
resources: [transcriptionOutputQueue.queueArn],
resources: [
transcriptionOutputQueue.queueArn,
giantTranscriptionOutputQueueArn,
],
}),
new GuAllowPolicy(this, 'WriteToELK', {
actions: [
Expand Down
1 change: 1 addition & 0 deletions packages/common/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ const zodLanguageCode = z.enum(getKeys(languageCodeToLanguage));

export enum DestinationService {
TranscriptionService = 'TranscriptionService',
Giant = 'Giant',
}

const SignedUrl = z.object({
Expand Down
6 changes: 3 additions & 3 deletions packages/worker/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ const pollTranscriptionQueue = async (
}
await publishTranscriptionOutputFailure(
sqsClient,
config.app.destinationQueueUrls.transcriptionService,
config.app.destinationQueueUrls[job.transcriptDestinationService],
job,
);
return;
Expand Down Expand Up @@ -293,7 +293,7 @@ const pollTranscriptionQueue = async (

await publishTranscriptionOutput(
sqsClient,
config.app.destinationQueueUrls.transcriptionService,
config.app.destinationQueueUrls[job.transcriptDestinationService],
transcriptionOutput,
);

Expand Down Expand Up @@ -333,7 +333,7 @@ const pollTranscriptionQueue = async (
if (receiveCount >= MAX_RECEIVE_COUNT) {
await publishTranscriptionOutputFailure(
sqsClient,
config.app.destinationQueueUrls.transcriptionService,
config.app.destinationQueueUrls[job.transcriptDestinationService],
job,
);
}
Expand Down
7 changes: 7 additions & 0 deletions scripts/delete-local-queues.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/zsh

aws sqs delete-queue --queue-url http://localhost:4566/000000000000/transcription-service-task-queue-DEV.fifo --endpoint-url http://localhost:4566
aws sqs delete-queue --queue-url http://localhost:4566/000000000000/transcription-service-output-queue-DEV --endpoint-url http://localhost:4566
aws sqs delete-queue --queue-url http://localhost:4566/000000000000/giant-output-dead-letter-queue-DEV.fifo --endpoint-url http://localhost:4566
aws sqs delete-queue --queue-url http://localhost:4566/000000000000/transcription-service-task-dead-letter-queue-DEV.fifo --endpoint-url http://localhost:4566
aws sqs delete-queue --queue-url http://localhost:4566/000000000000/giant-output-queue-DEV.fifo --endpoint-url http://localhost:4566
3 changes: 0 additions & 3 deletions scripts/purge-local-queue.sh

This file was deleted.

7 changes: 7 additions & 0 deletions scripts/purge-local-queues.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/zsh

aws sqs purge-queue --queue-url http://localhost:4566/000000000000/transcription-service-task-queue-DEV.fifo --endpoint-url http://localhost:4566
aws sqs purge-queue --queue-url http://localhost:4566/000000000000/transcription-service-output-queue-DEV --endpoint-url http://localhost:4566
aws sqs purge-queue --queue-url http://localhost:4566/000000000000/giant-output-dead-letter-queue-DEV.fifo --endpoint-url http://localhost:4566
aws sqs purge-queue --queue-url http://localhost:4566/000000000000/transcription-service-task-dead-letter-queue-DEV.fifo --endpoint-url http://localhost:4566
aws sqs purge-queue --queue-url http://localhost:4566/000000000000/giant-output-queue-DEV.fifo --endpoint-url http://localhost:4566
57 changes: 56 additions & 1 deletion scripts/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,73 @@ fi
docker-compose up -d
APP_NAME="transcription-service"
# If the queue already exists this command appears to still work and returns the existing queue url
QUEUE_URL=$(aws --endpoint-url=http://localhost:4566 sqs create-queue --queue-name=$APP_NAME-task-queue-DEV.fifo --attributes "FifoQueue=true,ContentBasedDeduplication=true" | jq .QueueUrl)

#########
##### task dead letter queue
#########
DEAD_LETTER_QUEUE_URL=$(aws --endpoint-url=http://localhost:4566 sqs create-queue --queue-name=$APP_NAME-task-dead-letter-queue-DEV.fifo --attributes "FifoQueue=true,ContentBasedDeduplication=true" | jq .QueueUrl)
# We don't install the localstack dns so need to replace the endpoint with localhost
DEAD_LETTER_QUEUE_URL_LOCALHOST=${DEAD_LETTER_QUEUE_URL/sqs.eu-west-1.localhost.localstack.cloud/localhost}

echo "Created queue in localstack, url: ${DEAD_LETTER_QUEUE_URL_LOCALHOST}"

#########
##### task queue
#########
QUEUE_URL=$(aws --endpoint-url=http://localhost:4566 sqs create-queue --queue-name=$APP_NAME-task-queue-DEV.fifo \
--attributes '{
"FifoQueue": "true",
"ContentBasedDeduplication": "true",
"RedrivePolicy": "{\"deadLetterTargetArn\":\"arn:aws:sqs:us-east-1:000000000000:transcription-service-task-dead-letter-queue-DEV.fifo\",\"maxReceiveCount\":\"3\"}"
}' | jq .QueueUrl)
# We don't install the localstack dns so need to replace the endpoint with localhost
QUEUE_URL_LOCALHOST=${QUEUE_URL/sqs.eu-west-1.localhost.localstack.cloud/localhost}

echo "Created queue in localstack, url: ${QUEUE_URL_LOCALHOST}"

#########
##### output queue
#########
OUTPUT_QUEUE_URL=$(aws --endpoint-url=http://localhost:4566 sqs create-queue --queue-name=$APP_NAME-output-queue-DEV | jq .QueueUrl)
# We don't install the localstack dns so need to replace the endpoint with localhost
OUTPUT_QUEUE_URL_LOCALHOST=${OUTPUT_QUEUE_URL/sqs.eu-west-1.localhost.localstack.cloud/localhost}

echo "Created queue in localstack, url: ${OUTPUT_QUEUE_URL_LOCALHOST}"

# ###########
# Creating output queue for Giant:
# Giant is a service that uses transcription service to transcribe its audio/video files.
# Giant pushes messages to the transcription input queue 'transcription-service-task-queue-DEV.fifo'
# and transcription worker pushes the resulting transcripts into the giant output queue 'giant-output-queue-DEV.fifo'.
# Since creating multiple localstack containers could add complication, and localstack is
# only needed for local running, the giant output queue is created in the transcription service localstack.
# ###########

#########
##### giant output dead letter queue
#########
GIANT_OUTPUT_DEAD_LETTER_QUEUE_URL=$(aws --endpoint-url=http://localhost:4566 sqs create-queue --queue-name=giant-output-dead-letter-queue-DEV.fifo --attributes "FifoQueue=true,ContentBasedDeduplication=true" | jq .QueueUrl)
# We don't install the localstack dns so need to replace the endpoint with localhost
GIANT_OUTPUT_DEAD_LETTER_QUEUE_URL_LOCALHOST=${GIANT_OUTPUT_DEAD_LETTER_QUEUE_URL/sqs.eu-west-1.localhost.localstack.cloud/localhost}

echo "Created queue in localstack, url: ${GIANT_OUTPUT_DEAD_LETTER_QUEUE_URL_LOCALHOST}"

#########
##### giant output queue
#########
GIANT_OUTPUT_QUEUE_URL=$(aws --endpoint-url=http://localhost:4566 sqs create-queue --queue-name=giant-output-queue-DEV.fifo \
--attributes '{
"FifoQueue": "true",
"ContentBasedDeduplication": "true",
"RedrivePolicy": "{\"deadLetterTargetArn\":\"arn:aws:sqs:us-east-1:000000000000:giant-output-dead-letter-queue-DEV.fifo\",\"maxReceiveCount\":\"3\"}"
}' | jq .QueueUrl)


# We don't install the localstack dns so need to replace the endpoint with localhost
GIANT_OUTPUT_QUEUE_URL_LOCALHOST=${GIANT_OUTPUT_QUEUE_URL/sqs.eu-west-1.localhost.localstack.cloud/localhost}

echo "Created queue in localstack, url: ${GIANT_OUTPUT_QUEUE_URL_LOCALHOST}"

DYNAMODB_ARN=$(aws --endpoint-url=http://localhost:4566 dynamodb create-table \
--table-name ${APP_NAME}-DEV \
--provisioned-throughput ReadCapacityUnits=5,WriteCapacityUnits=5 \
Expand Down
Loading