diff --git a/ClassTranscribeDatabase/CaptionQueries.cs b/ClassTranscribeDatabase/CaptionQueries.cs index cf3e8a1..7d461a7 100644 --- a/ClassTranscribeDatabase/CaptionQueries.cs +++ b/ClassTranscribeDatabase/CaptionQueries.cs @@ -25,7 +25,7 @@ public async Task> GetCaptionsAsync(string videoId, string sourceI { try { - var transcriptionId = _context.Transcriptions.Where(t => t.Language == language && t.VideoId == videoId && t.SourceInternalRef== sourceInternalRef + var transcriptionId = _context.Transcriptions.Where(t => t.Language == language && t.VideoId == videoId && t.SourceInternalRef == sourceInternalRef && t.TranscriptionType == TranscriptionType.Caption).First().Id; return await GetCaptionsAsync(transcriptionId); } @@ -37,6 +37,26 @@ public async Task> GetCaptionsAsync(string videoId, string sourceI } } + /// + /// Get the text descriptions for a given videoId + /// + /// Language of the captions to fetch. + public async Task> GetDescriptionsAsync(string videoId, string language) // = "en-US" + { + try + { + var transcriptionId = _context.Transcriptions.Where(t => t.Language == language && t.VideoId == videoId + && t.TranscriptionType == TranscriptionType.TextDescription).First().Id; + return await GetCaptionsAsync(transcriptionId); + } + catch (System.InvalidOperationException) + { + // If Transcriptions do not exist then First() will throw InvalidOperationException + + return new List(); + } + } + /// /// Get the captions for a given transcriptionId /// diff --git a/ClassTranscribeServer/Controllers/EPubsController.cs b/ClassTranscribeServer/Controllers/EPubsController.cs index 87b6129..f86586c 100644 --- a/ClassTranscribeServer/Controllers/EPubsController.cs +++ b/ClassTranscribeServer/Controllers/EPubsController.cs @@ -4,6 +4,7 @@ using Microsoft.AspNetCore.Mvc; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Logging; +using Newtonsoft.Json; using Newtonsoft.Json.Linq; using System; using System.Collections.Generic; @@ -13,289 +14,312 @@ namespace ClassTranscribeServer.Controllers { - [Route("api/[controller]")] - [ApiController] - public class EPubsController : BaseController + [Route("api/[controller]")] + [ApiController] + public class EPubsController : BaseController + { + private readonly WakeDownloader _wakeDownloader; + private readonly CaptionQueries _captionQueries; + + public EPubsController(WakeDownloader wakeDownloader, + CTDbContext context, + CaptionQueries captionQueries, + ILogger logger) : base(context, logger) { - private readonly WakeDownloader _wakeDownloader; - private readonly CaptionQueries _captionQueries; + _captionQueries = captionQueries; + _wakeDownloader = wakeDownloader; + } + + public class EPubSceneData + { + public string Image { get; set; } + public string Text { get; set; } + public TimeSpan Start { get; set; } + public TimeSpan End { get; set; } + // public string OCRPhrases { get; set; } + // public string OCRText { get; set; } + public List OCRElements { get; set; } + public string Title { get; set; } + } - public EPubsController(WakeDownloader wakeDownloader, - CTDbContext context, - CaptionQueries captionQueries, - ILogger logger) : base(context, logger) + [NonAction] + public List GetSceneData(JArray scenes, List captions, List descriptions) + { + var sorted_descriptions = descriptions.OrderBy(c => c.Begin); + var chapters = new List(); + var nextStart = new TimeSpan(0); + + if (scenes == null) + { + return chapters; + } + foreach (Caption description in sorted_descriptions) + { + var endTime = description.End; + var subset = captions.Where(c => c.Begin < endTime && c.Begin >= nextStart).ToList(); + if (description == sorted_descriptions.Last()) { - _captionQueries = captionQueries; - _wakeDownloader = wakeDownloader; + subset = captions.Where(c => c.Begin >= nextStart).ToList(); } - - public class EPubSceneData + TimeSpan GetSceneTimestamp(JToken scene) { - public string Image { get; set; } - public string Text { get; set; } - public TimeSpan Start { get; set; } - public TimeSpan End { get; set; } - public string OCRPhrases { get; set; } - public string OCRText { get; set; } - public string Title { get; set; } + return TimeSpan.Parse(scene["start"].ToString()); } + JObject closestScene = (JObject)scenes.Where(s => GetSceneTimestamp(s) < description.End && + GetSceneTimestamp(s) > description.Begin). + FirstOrDefault(); - [NonAction] - public static List GetSceneData(JArray scenes, List captions) + if (closestScene == null) { - var chapters = new List(); - var nextStart = new TimeSpan(0); - - if (scenes == null) - { - return chapters; - } - - foreach (JObject scene in scenes) - { - var endTime = TimeSpan.Parse(scene["end"].ToString()); - var subset = captions.Where(c => c.Begin < endTime && c.Begin >= nextStart).ToList(); - - StringBuilder sb = new StringBuilder(); - subset.ForEach(c => sb.Append(c.Text + " ")); - - chapters.Add(new EPubSceneData - { - Image = scene["img_file"].ToString(), - Start = TimeSpan.Parse(scene["start"].ToString()), - End = TimeSpan.Parse(scene["end"].ToString()), - Text = sb.ToString(), - // Todo what if scene does not contain "title" ? - OCRText = scene["raw_text"]?.ToString(), - OCRPhrases = scene["phrases"]?.ToString(), - Title = scene["title"]?.ToString() - }); - - nextStart = endTime; - } - - return chapters; + closestScene = (JObject)scenes.OrderBy(s => Math.Abs((GetSceneTimestamp(s) - description.Begin).Ticks)).FirstOrDefault(); } - /// - /// Gets captions and images for a given video - /// - /// - [HttpGet("GetEpubData")] - [Authorize] - public async Task>> GetEpubData(string mediaId, string language) + StringBuilder sb = new StringBuilder(); + subset.ForEach(c => sb.Append(c.Text + " ")); + var img_descriptions = closestScene["phrases"]?.Select(p => p.ToString()).ToList(); + img_descriptions.Insert(0, description.Text); + chapters.Add(new EPubSceneData { - _logger.LogInformation($"GetEpubData({mediaId},{language}) starting"); - var media = _context.Medias.Find(mediaId); - Video video = await _context.Videos.FindAsync(media.VideoId); - _logger.LogInformation($"GetEpubData({mediaId},{language}) video found. SceneData:{video.SceneObjectDataId}."); - - if (!video.HasSceneObjectData()) { - _logger.LogInformation($"GetEpubData({mediaId}) - Early return - no SceneObjectData"); - return NotFound(); - } - TextData data = await _context.TextData.FindAsync(video.SceneObjectDataId); - _logger.LogInformation($"GetEpubData({mediaId},{language}) getting scenedata as JArray"); - JArray sceneArray = data.GetAsJSON()["Scenes"] as JArray; - - EPub epub = new EPub - { - Language = language, - SourceType = ResourceType.Media, - SourceId = mediaId - }; - const string SOURCEINTERNALREF= "ClassTranscribe/Azure"; // Do not change me; this is a key inside the database - // to indicate the source of the captions was this code - var captions = await _captionQueries.GetCaptionsAsync(media.VideoId, SOURCEINTERNALREF, epub.Language); - _logger.LogInformation($"GetEpubData({mediaId}) - returning combined SceneData"); - - return GetSceneData(sceneArray, captions); + Image = closestScene["img_file"].ToString(), + Start = nextStart, + End = endTime, + Text = sb.ToString(), + // OCRText = description.Text, + // OCRPhrases = closestScene["phrases"]?.ToString(), + // OCRPhrases = description.Text, + OCRElements = img_descriptions, + Title = closestScene["title"]?.ToString() + }); + + nextStart = endTime; + } + return chapters; + } - } + /// + /// Gets captions and images for a given video + /// + /// + [HttpGet("GetEpubData")] + [Authorize] + public async Task>> GetEpubData(string mediaId, string language) + { + var media = _context.Medias.Find(mediaId); + Video video = await _context.Videos.FindAsync(media.VideoId); + + if (!video.HasSceneObjectData()) + { + return NotFound(); + } + TextData data = await _context.TextData.FindAsync(video.SceneObjectDataId); + JArray sceneArray = data.GetAsJSON()["Scenes"] as JArray; + + EPub epub = new EPub + { + Language = language, + SourceType = ResourceType.Media, + SourceId = mediaId + }; + const string SOURCEINTERNALREF = "ClassTranscribe/Local"; // This is a key inside the database to + // indicate the source of the captions + var captions = await _captionQueries.GetCaptionsAsync(media.VideoId, SOURCEINTERNALREF, epub.Language); + if (captions.Count == 0) + { + const string LEGACYSOURCEINTERNALREF = "ClassTranscribe/Azure"; // We should only ask for captions from this + // source if the other has no entries + captions = await _captionQueries.GetCaptionsAsync(media.VideoId, LEGACYSOURCEINTERNALREF, epub.Language); + } + var descriptions = await _captionQueries.GetDescriptionsAsync(media.VideoId, epub.Language); + + var sd = GetSceneData(sceneArray, captions, descriptions); + return sd; - /// - /// Gets glossary for a given video - /// - /// - [HttpGet("GetGlossaryData")] - [Authorize] - public async Task> GetGlossaryData(string mediaId) - { - var media = _context.Medias.Find(mediaId); - Video video = await _context.Videos.FindAsync(media.VideoId); - if (video.HasGlossaryData()) { - TextData data = await _context.TextData.FindAsync(video.GlossaryDataId); - return data.GetAsJSON(); - } - return video.Glossary; + } - } + /// + /// Gets glossary for a given video + /// + /// + [HttpGet("GetGlossaryData")] + [Authorize] + public async Task> GetGlossaryData(string mediaId) + { + var media = _context.Medias.Find(mediaId); + Video video = await _context.Videos.FindAsync(media.VideoId); + if (video.HasGlossaryData()) + { + TextData data = await _context.TextData.FindAsync(video.GlossaryDataId); + return data.GetAsJSON(); + } + return video.Glossary; + + } + + [HttpGet("RequestEpubCreation")] + [Authorize] + public ActionResult RequestEpubCreation(string mediaId) + { + _wakeDownloader.GenerateScenes(mediaId); + return Ok(); + } + + // GET: api/EPubs/5 + [HttpGet("{id}")] + [Authorize] + public async Task> GetEPub(string id) + { + var ePub = await _context.EPubs.FindAsync(id); + + if (ePub == null) + { + return NotFound(); + } - [HttpGet("RequestEpubCreation")] - [Authorize] - public ActionResult RequestEpubCreation(string mediaId) + return ePub; + } + + // GET: api/EPubs/ByOwner/{userid} + [HttpGet("ByOwner/{UserId}")] + [Authorize] + public async Task>> GetEPubs(string userId = "") + { + try + { + + var ePubs = await _context.EPubs.ToListAsync(); + + if (!ePubs.Any()) { - _wakeDownloader.GenerateScenes(mediaId); - return Ok(); + return NotFound(); } - // GET: api/EPubs/5 - [HttpGet("{id}")] - [Authorize] - public async Task> GetEPub(string id) + ePubs.ForEach(ePub => { - var ePub = await _context.EPubs.FindAsync(id); + ePub.Chapters = null; + }); + + return ePubs; + } + catch (ArgumentException) + { + return BadRequest($"Invalid request to /api/EPubs/ByOwner/{userId}"); + } + } - if (ePub == null) - { - return NotFound(); - } + // GET: api/EPubs/BySource/{sourceType}/{sourceId} + [HttpGet("BySource/{sourceType}/{sourceId}")] + [Authorize] + public async Task>> GetEPubsBySource(string sourceType, string sourceId) + { + try + { + ResourceType type = (ResourceType)Enum.Parse(typeof(ResourceType), sourceType); - return ePub; - } + var ePubs = await _context.EPubs.Where(i => i.SourceType == type && i.SourceId == sourceId).ToListAsync(); - // GET: api/EPubs/ByOwner/{userid} - [HttpGet("ByOwner/{UserId}")] - [Authorize] - public async Task>> GetEPubs(string userId = "") + if (!ePubs.Any()) { - try - { - - var ePubs = await _context.EPubs.ToListAsync(); - - if (!ePubs.Any()) - { - return NotFound(); - } - - ePubs.ForEach(ePub => - { - ePub.Chapters = null; - }); - - return ePubs; - } - catch (ArgumentException) - { - return BadRequest($"Invalid request to /api/EPubs/ByOwner/{userId}"); - } + return NotFound(); } - // GET: api/EPubs/BySource/{sourceType}/{sourceId} - [HttpGet("BySource/{sourceType}/{sourceId}")] - [Authorize] - public async Task>> GetEPubsBySource(string sourceType, string sourceId) + ePubs.ForEach(ePub => { - try - { - ResourceType type = (ResourceType)Enum.Parse(typeof(ResourceType), sourceType); - - var ePubs = await _context.EPubs.Where(i => i.SourceType == type && i.SourceId == sourceId).ToListAsync(); - - if (!ePubs.Any()) - { - return NotFound(); - } - - ePubs.ForEach(ePub => - { - ePub.Chapters = null; - }); - - return ePubs; - } - catch (ArgumentException) - { - return BadRequest($"{sourceType} is not a valid resource type"); - } - } + ePub.Chapters = null; + }); + + return ePubs; + } + catch (ArgumentException) + { + return BadRequest($"{sourceType} is not a valid resource type"); + } + } - // PUT: api/EPubs/5 - [HttpPut("{id}")] - [DisableRequestSizeLimit] - [Authorize(Roles = Globals.ROLE_ADMIN + "," + Globals.ROLE_TEACHING_ASSISTANT + "," + Globals.ROLE_INSTRUCTOR)] - public async Task PutEPub(string id, EPub ePub) + // PUT: api/EPubs/5 + [HttpPut("{id}")] + [DisableRequestSizeLimit] + [Authorize(Roles = Globals.ROLE_ADMIN + "," + Globals.ROLE_TEACHING_ASSISTANT + "," + Globals.ROLE_INSTRUCTOR)] + public async Task PutEPub(string id, EPub ePub) + { + if (ePub == null || id != ePub.Id) + { + return BadRequest(); + } + + if (string.IsNullOrEmpty(ePub.Title) || + string.IsNullOrEmpty(ePub.Filename) || + string.IsNullOrEmpty(ePub.Language) || + string.IsNullOrEmpty(ePub.Author) || + string.IsNullOrEmpty(ePub.Publisher) || + string.IsNullOrEmpty(ePub.SourceId)) + { + return BadRequest("The following fields may not be empty: title, filename, language, author, publisher, sourceId"); + } + + _context.Entry(ePub).State = EntityState.Modified; + + try + { + await _context.SaveChangesAsync(); + } + catch (DbUpdateConcurrencyException) + { + if (!_context.EPubs.Any(e => e.Id == id)) { - if (ePub == null || id != ePub.Id) - { - return BadRequest(); - } - - if (string.IsNullOrEmpty(ePub.Title) || - string.IsNullOrEmpty(ePub.Filename) || - string.IsNullOrEmpty(ePub.Language) || - string.IsNullOrEmpty(ePub.Author) || - string.IsNullOrEmpty(ePub.Publisher) || - string.IsNullOrEmpty(ePub.SourceId)) - { - return BadRequest("The following fields may not be empty: title, filename, language, author, publisher, sourceId"); - } - - _context.Entry(ePub).State = EntityState.Modified; - - try - { - await _context.SaveChangesAsync(); - } - catch (DbUpdateConcurrencyException) - { - if (!_context.EPubs.Any(e => e.Id == id)) - { - return NotFound(); - } - else - { - throw; - } - } - - return NoContent(); + return NotFound(); } - - // POST: api/EPubs - [HttpPost] - [DisableRequestSizeLimit] - [Authorize(Roles = Globals.ROLE_ADMIN + "," + Globals.ROLE_TEACHING_ASSISTANT + "," + Globals.ROLE_INSTRUCTOR)] - public async Task> PostEPub(EPub ePub) + else { - if (ePub == null) - { - return BadRequest(); - } - - if (string.IsNullOrEmpty(ePub.Title) || - string.IsNullOrEmpty(ePub.Filename) || - string.IsNullOrEmpty(ePub.Language) || - string.IsNullOrEmpty(ePub.Author) || - string.IsNullOrEmpty(ePub.Publisher) || - string.IsNullOrEmpty(ePub.SourceId)) - { - return BadRequest("The following fields may not be empty: title, filename, language, author, publisher, sourceId"); - } - - _context.EPubs.Add(ePub); - await _context.SaveChangesAsync(); - - return CreatedAtAction("GetEPub", new { id = ePub.Id }, ePub); + throw; } + } - // DELETE: api/EPubs/5 - [HttpDelete("{id}")] - [Authorize(Roles = Globals.ROLE_ADMIN + "," + Globals.ROLE_TEACHING_ASSISTANT + "," + Globals.ROLE_INSTRUCTOR)] - public async Task> DeleteEPub(string id) - { - var ePub = await _context.EPubs.FindAsync(id); + return NoContent(); + } + + // POST: api/EPubs + [HttpPost] + [DisableRequestSizeLimit] + [Authorize(Roles = Globals.ROLE_ADMIN + "," + Globals.ROLE_TEACHING_ASSISTANT + "," + Globals.ROLE_INSTRUCTOR)] + public async Task> PostEPub(EPub ePub) + { + if (ePub == null) + { + return BadRequest(); + } + + if (string.IsNullOrEmpty(ePub.Title) || + string.IsNullOrEmpty(ePub.Filename) || + string.IsNullOrEmpty(ePub.Language) || + string.IsNullOrEmpty(ePub.Author) || + string.IsNullOrEmpty(ePub.Publisher) || + string.IsNullOrEmpty(ePub.SourceId)) + { + return BadRequest("The following fields may not be empty: title, filename, language, author, publisher, sourceId"); + } + + _context.EPubs.Add(ePub); + await _context.SaveChangesAsync(); + + return CreatedAtAction("GetEPub", new { id = ePub.Id }, ePub); + } - if (ePub == null) - { - return NotFound(); - } + // DELETE: api/EPubs/5 + [HttpDelete("{id}")] + [Authorize(Roles = Globals.ROLE_ADMIN + "," + Globals.ROLE_TEACHING_ASSISTANT + "," + Globals.ROLE_INSTRUCTOR)] + public async Task> DeleteEPub(string id) + { + var ePub = await _context.EPubs.FindAsync(id); - _context.EPubs.Remove(ePub); - await _context.SaveChangesAsync(); + if (ePub == null) + { + return NotFound(); + } - return ePub; - } + _context.EPubs.Remove(ePub); + await _context.SaveChangesAsync(); + + return ePub; } + } } \ No newline at end of file