Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
245 changes: 245 additions & 0 deletions scripts/firebase-admin/migrateHearingTranscription.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
import * as admin from "firebase-admin"
import { Number, Record, String } from "runtypes"
import { Script } from "./types"
import { Timestamp } from "../../functions/src/firebase"

function getDevServiceAccount(path: string) {
return require(path)
}

// Initialize source (dev) Firebase
function initDevApp(devServiceAccountPath: string) {
const devServiceAccount = getDevServiceAccount(devServiceAccountPath)
return admin.initializeApp(
{
credential: admin.credential.cert(devServiceAccount)
},
"dev"
)
}

function convertTimestamps(obj: any): any {
if (obj instanceof Timestamp) {
return obj
} else if (
obj &&
typeof obj === "object" &&
typeof obj._seconds === "number" &&
typeof obj._nanoseconds === "number"
) {
// Convert plain object to admin Timestamp
return Timestamp.fromMillis(obj._seconds * 1000 + obj._nanoseconds / 1e6)
} else if (Array.isArray(obj)) {
return obj.map(convertTimestamps)
} else if (obj && typeof obj === "object") {
return Object.fromEntries(
Object.entries(obj).map(([k, v]) => [k, convertTimestamps(v)])
)
}
return obj
}

const Args = Record({
sourceProject: String,
hearing: Number.optional()
})

export const script: Script = async ({ db, args }) => {
const { sourceProject, hearing } = Args.check(args)
if (!sourceProject) {
console.error(
"Please provide the path to the dev service account JSON file as an argument."
)
process.exit(1)
}

// Clear emulator environment variables to avoid connecting to emulators when creating secondary instance
delete process.env.FIRESTORE_EMULATOR_HOST
delete process.env.FIREBASE_AUTH_EMULATOR_HOST
delete process.env.FIREBASE_STORAGE_EMULATOR_HOST

// Initialize dev app and db (digital-testimony-dev)
const devApp = initDevApp(sourceProject)
const devDb = devApp.firestore()

// For single hearing migration
if (hearing) {
const hearingId = "hearing-" + hearing
console.log(`Processing single hearing: ${hearingId}`)
const devHearingsSnapshot = await devDb
.collection("events")
.doc(hearingId)
.get()

if (!devHearingsSnapshot.exists) {
console.error(`Hearing ${hearingId} not found in dev project.`)
return
}
const devData = devHearingsSnapshot.data()

if (!devData?.videoTranscriptionId) {
console.log(`Hearing ${hearingId} has no transcription to migrate.`)
return
}
const targetDoc = await db.collection("events").doc(hearingId).get()
const targetData = targetDoc.exists ? targetDoc.data() : null

// Only migrate if hearing in target environment does not have a transcription yet
if (!targetData?.videoTranscriptionId) {
const transcriptionId = devData.videoTranscriptionId
const devTranscriptionDoc = await devDb
.collection("transcriptions")
.doc(transcriptionId)
.get()

const devTranscriptionData = devTranscriptionDoc.exists
? devTranscriptionDoc.data()
: null

if (devTranscriptionData) {
// Create transcription in target project instead of setting, in case it already exists, which will throw an error
const convertedData = convertTimestamps(devTranscriptionData)
try {
console.log(`Creating transcription ${transcriptionId}...`)
await db
.collection("transcriptions")
.doc(transcriptionId)
.create(convertedData)
} catch (err) {
console.error(`Error creating transcription ${transcriptionId}:`, err)
return
}

const subcollections = await devTranscriptionDoc.ref.listCollections()
for (const subcol of subcollections) {
const docs = await subcol.get()
for (const doc of docs.docs) {
await db
.collection("transcriptions")
.doc(transcriptionId)
.collection(subcol.id)
.doc(doc.id)
.set(doc.data())
}
}
} else {
console.error(
`Transcription ${transcriptionId} not found in dev project.`
)
}

await db
.collection("events")
.doc(hearingId)
.update({
videoURL: devData.videoURL,
videoFetchedAt: convertTimestamps(devData.videoFetchedAt),
videoTranscriptionId: devData.videoTranscriptionId
})
console.log(`Migration complete for hearing ${hearingId}.`)
}
} else {
// For full migration
const devHearingsSnapshot = await devDb
.collection("events")
.where("type", "==", "hearing")
.get()

const limit = 100
let migrated = 0,
skipped = 0,
failed = 0

const bulkWriter = db.bulkWriter()

for (const devDoc of devHearingsSnapshot.docs) {
if (migrated >= limit) {
console.log(`Migration limit of ${limit} reached. Stopping.`)
break
}
const devData = devDoc.data()
if (!devData.videoTranscriptionId) {
skipped++
console.log(`${devDoc.id} has no transcription to migrate.`)
continue
}

const targetDoc = await db.collection("events").doc(devDoc.id).get()
const targetData = targetDoc.exists ? targetDoc.data() : null

if (!targetData) {
skipped++
console.log(`${devDoc.id} not found in target project.`)
continue
}

// Only migrate if hearing in target environment does not have a transcription yet
if (!targetData?.videoTranscriptionId) {
console.log(`Migrating ${devDoc.id}...`)
const transcriptionId = devData.videoTranscriptionId
const devTranscriptionDoc = await devDb
.collection("transcriptions")
.doc(transcriptionId)
.get()

const devTranscriptionData = devTranscriptionDoc.exists
? devTranscriptionDoc.data()
: null

if (devTranscriptionData) {
// Create transcription in target project instead of setting, in case it already exists, which will throw an error
const convertedData = convertTimestamps(devTranscriptionData)
try {
console.log(`Creating transcription ${transcriptionId}...`)
bulkWriter.create(
db.collection("transcriptions").doc(transcriptionId),
convertedData
)
} catch (err) {
failed++
console.error(
`Error creating transcription ${transcriptionId}:`,
err
)
continue
}

const subcollections = await devTranscriptionDoc.ref.listCollections()
for (const subcol of subcollections) {
const docs = await subcol.get()
for (const doc of docs.docs) {
await db
.collection("transcriptions")
.doc(transcriptionId)
.collection(subcol.id)
.doc(doc.id)
.set(doc.data())
}
}
} else {
failed++
console.error(
`Transcription ${transcriptionId} not found in dev project.`
)
continue
}

console.log(`Updating ${devDoc.id}...`)
bulkWriter.update(db.collection("events").doc(devDoc.id), {
videoURL: devData.videoURL,
videoFetchedAt: convertTimestamps(devData.videoFetchedAt),
videoTranscriptionId: devData.videoTranscriptionId
})
migrated++
} else {
console.log(`${devDoc.id} already has a transcription, skipping.`)
skipped++
}
}

await bulkWriter.close()
console.log(
`Migration complete. Migrated: ${migrated}, Skipped: ${skipped}, Failed: ${failed}`
)
}
}