Skip to content

Commit 7d2ea04

Browse files
authored
Add support for text-to-image (#6648)
* Add ITextToImageClient * Remove URI based edit since it's not available * Add filename for edit * Add OpenAI implmentation of ITextToImageClient * Fix tests * Add tests for TextToImage * Add DeletgatingTextToImageClient and tests * Add integration test and fix some bugs * Add remaining support to MEAI for TextToImage * Make all TextToImageOptions optional These are all nullable now so that the client can use defaults where appropriate. Remove quality default since it's not consistent across models. Also remove setting ResponseFormat since this is not supported by gpt-image-1. * Address feedback * Document some exceptions * Address feedback * Make EditImageAsync plural OpenAI's image API supports multiple images and this does seem to be common functionality and a better generalization. The client library doesn't expose this yet, but we should account for it. Image models may be capable of things like "Combine the subjects of these images into a single image" or "Create a single image that uses the subject from the first image and background for the second" etc. * Address feedback and add/fix tests. * Fix bad merge * Address feedback * Fix test * Use DataContent.Name for filename. * Add extensions for EditImageAsync Extension that accepts a single DataContent and one that accepts a byte[]. I've left out streams and file paths, since these require more opinions about how to load them. I filed #6683 to address streams. * Fix test * Remove use of `_model` field. * Rename ImageToText to Image * Rename TextToImage directories to Image * Rename files TextToImage -> Image * Add new request and response type * Make GenerateImagesAsync accept ImageRequest * Remove EditImageAsync * Adding GenerateStreamingImagesAsync * Update docs * Rename ImageClient ImageGenerator * Fix up some text-to-image references * Rename Image(Options|Request|Response) * Remove `Images` from `GenerateImagesAsync` * Remove streaming method We don't yet have any good public support for streaming to vet this API We can guess at how it might behave for OpenAI, but that doesn't really give enough confidence to build the API around it. * Address feedback * Provide OpenAI an appropriate filename * Remove Style from ImageGenerationOptions
1 parent 8513ff7 commit 7d2ea04

35 files changed

+2734
-0
lines changed
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System;
5+
using System.Diagnostics.CodeAnalysis;
6+
using System.Threading;
7+
using System.Threading.Tasks;
8+
using Microsoft.Shared.Diagnostics;
9+
10+
namespace Microsoft.Extensions.AI;
11+
12+
/// <summary>
13+
/// Provides an optional base class for an <see cref="IImageGenerator"/> that passes through calls to another instance.
14+
/// </summary>
15+
/// <remarks>
16+
/// This is recommended as a base type when building generators that can be chained in any order around an underlying <see cref="IImageGenerator"/>.
17+
/// The default implementation simply passes each call to the inner generator instance.
18+
/// </remarks>
19+
[Experimental("MEAI001")]
20+
public class DelegatingImageGenerator : IImageGenerator
21+
{
22+
/// <summary>
23+
/// Initializes a new instance of the <see cref="DelegatingImageGenerator"/> class.
24+
/// </summary>
25+
/// <param name="innerGenerator">The wrapped generator instance.</param>
26+
/// <exception cref="ArgumentNullException"><paramref name="innerGenerator"/> is <see langword="null"/>.</exception>
27+
protected DelegatingImageGenerator(IImageGenerator innerGenerator)
28+
{
29+
InnerGenerator = Throw.IfNull(innerGenerator);
30+
}
31+
32+
/// <inheritdoc />
33+
public void Dispose()
34+
{
35+
Dispose(disposing: true);
36+
GC.SuppressFinalize(this);
37+
}
38+
39+
/// <summary>Gets the inner <see cref="IImageGenerator" />.</summary>
40+
protected IImageGenerator InnerGenerator { get; }
41+
42+
/// <inheritdoc />
43+
public virtual Task<ImageGenerationResponse> GenerateAsync(
44+
ImageGenerationRequest request, ImageGenerationOptions? options = null, CancellationToken cancellationToken = default)
45+
{
46+
return InnerGenerator.GenerateAsync(request, options, cancellationToken);
47+
}
48+
49+
/// <inheritdoc />
50+
public virtual object? GetService(Type serviceType, object? serviceKey = null)
51+
{
52+
_ = Throw.IfNull(serviceType);
53+
54+
// If the key is non-null, we don't know what it means so pass through to the inner service.
55+
return
56+
serviceKey is null && serviceType.IsInstanceOfType(this) ? this :
57+
InnerGenerator.GetService(serviceType, serviceKey);
58+
}
59+
60+
/// <summary>Provides a mechanism for releasing unmanaged resources.</summary>
61+
/// <param name="disposing"><see langword="true"/> if being called from <see cref="Dispose()"/>; otherwise, <see langword="false"/>.</param>
62+
protected virtual void Dispose(bool disposing)
63+
{
64+
if (disposing)
65+
{
66+
InnerGenerator.Dispose();
67+
}
68+
}
69+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System;
5+
using System.Diagnostics.CodeAnalysis;
6+
using System.Threading;
7+
using System.Threading.Tasks;
8+
9+
namespace Microsoft.Extensions.AI;
10+
11+
/// <summary>
12+
/// Represents a generator of images.
13+
/// </summary>
14+
[Experimental("MEAI001")]
15+
public interface IImageGenerator : IDisposable
16+
{
17+
/// <summary>
18+
/// Sends an image generation request and returns the generated image as a <see cref="ImageGenerationResponse"/>.
19+
/// </summary>
20+
/// <param name="request">The image generation request containing the prompt and optional original images for editing.</param>
21+
/// <param name="options">The image generation options to configure the request.</param>
22+
/// <param name="cancellationToken">The <see cref="CancellationToken"/> to monitor for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
23+
/// <exception cref="ArgumentNullException"><paramref name="request"/> is <see langword="null"/>.</exception>
24+
/// <returns>The images generated by the <see cref="ImageGenerationRequest"/>.</returns>
25+
Task<ImageGenerationResponse> GenerateAsync(ImageGenerationRequest request, ImageGenerationOptions? options = null, CancellationToken cancellationToken = default);
26+
27+
/// <summary>Asks the <see cref="IImageGenerator"/> for an object of the specified type <paramref name="serviceType"/>.</summary>
28+
/// <param name="serviceType">The type of object being requested.</param>
29+
/// <param name="serviceKey">An optional key that can be used to help identify the target service.</param>
30+
/// <returns>The found object, otherwise <see langword="null"/>.</returns>
31+
/// <exception cref="ArgumentNullException"><paramref name="serviceType"/> is <see langword="null"/>.</exception>
32+
/// <remarks>
33+
/// The purpose of this method is to allow for the retrieval of strongly typed services that might be provided by the <see cref="IImageGenerator"/>,
34+
/// including itself or any services it might be wrapping.
35+
/// </remarks>
36+
object? GetService(Type serviceType, object? serviceKey = null);
37+
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System;
5+
using System.Diagnostics.CodeAnalysis;
6+
using System.Drawing;
7+
using System.Text.Json.Serialization;
8+
9+
namespace Microsoft.Extensions.AI;
10+
11+
/// <summary>Represents the options for an image generation request.</summary>
12+
[Experimental("MEAI001")]
13+
public class ImageGenerationOptions
14+
{
15+
/// <summary>
16+
/// Gets or sets the number of images to generate.
17+
/// </summary>
18+
public int? Count { get; set; }
19+
20+
/// <summary>
21+
/// Gets or sets the size of the generated image.
22+
/// </summary>
23+
/// <remarks>
24+
/// If a provider only supports fixed sizes the closest supported size will be used.
25+
/// </remarks>
26+
public Size? ImageSize { get; set; }
27+
28+
/// <summary>
29+
/// Gets or sets the media type (also known as MIME type) of the generated image.
30+
/// </summary>
31+
public string? MediaType { get; set; }
32+
33+
/// <summary>
34+
/// Gets or sets the model ID to use for image generation.
35+
/// </summary>
36+
public string? ModelId { get; set; }
37+
38+
/// <summary>
39+
/// Gets or sets a callback responsible for creating the raw representation of the image generation options from an underlying implementation.
40+
/// </summary>
41+
/// <remarks>
42+
/// The underlying <see cref="IImageGenerator" /> implementation may have its own representation of options.
43+
/// When <see cref="IImageGenerator.GenerateAsync" /> is invoked with an <see cref="ImageGenerationOptions" />,
44+
/// that implementation may convert the provided options into its own representation in order to use it while performing
45+
/// the operation. For situations where a consumer knows which concrete <see cref="IImageGenerator" /> is being used
46+
/// and how it represents options, a new instance of that implementation-specific options type may be returned by this
47+
/// callback, for the <see cref="IImageGenerator" />implementation to use instead of creating a new instance.
48+
/// Such implementations may mutate the supplied options instance further based on other settings supplied on this
49+
/// <see cref="ImageGenerationOptions" /> instance or from other inputs, therefore, it is <b>strongly recommended</b> to not
50+
/// return shared instances and instead make the callback return a new instance on each call.
51+
/// This is typically used to set an implementation-specific setting that isn't otherwise exposed from the strongly-typed
52+
/// properties on <see cref="ImageGenerationOptions" />.
53+
/// </remarks>
54+
[JsonIgnore]
55+
public Func<IImageGenerator, object?>? RawRepresentationFactory { get; set; }
56+
57+
/// <summary>
58+
/// Gets or sets the response format of the generated image.
59+
/// </summary>
60+
public ImageGenerationResponseFormat? ResponseFormat { get; set; }
61+
62+
/// <summary>Produces a clone of the current <see cref="ImageGenerationOptions"/> instance.</summary>
63+
/// <returns>A clone of the current <see cref="ImageGenerationOptions"/> instance.</returns>
64+
public virtual ImageGenerationOptions Clone()
65+
{
66+
ImageGenerationOptions options = new()
67+
{
68+
Count = Count,
69+
MediaType = MediaType,
70+
ImageSize = ImageSize,
71+
ModelId = ModelId,
72+
RawRepresentationFactory = RawRepresentationFactory,
73+
ResponseFormat = ResponseFormat
74+
};
75+
76+
return options;
77+
}
78+
}
79+
80+
/// <summary>
81+
/// Represents the requested response format of the generated image.
82+
/// </summary>
83+
/// <remarks>
84+
/// Not all implementations support all response formats and this value may be ignored by the implementation if not supported.
85+
/// </remarks>
86+
[Experimental("MEAI001")]
87+
public enum ImageGenerationResponseFormat
88+
{
89+
/// <summary>
90+
/// The generated image is returned as a URI pointing to the image resource.
91+
/// </summary>
92+
Uri,
93+
94+
/// <summary>
95+
/// The generated image is returned as in-memory image data.
96+
/// </summary>
97+
Data,
98+
99+
/// <summary>
100+
/// The generated image is returned as a hosted resource identifier, which can be used to retrieve the image later.
101+
/// </summary>
102+
Hosted,
103+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Collections.Generic;
5+
using System.Diagnostics.CodeAnalysis;
6+
7+
namespace Microsoft.Extensions.AI;
8+
9+
/// <summary>Represents a request for image generation.</summary>
10+
[Experimental("MEAI001")]
11+
public class ImageGenerationRequest
12+
{
13+
/// <summary>Initializes a new instance of the <see cref="ImageGenerationRequest"/> class.</summary>
14+
public ImageGenerationRequest()
15+
{
16+
}
17+
18+
/// <summary>Initializes a new instance of the <see cref="ImageGenerationRequest"/> class.</summary>
19+
/// <param name="prompt">The prompt to guide the image generation.</param>
20+
public ImageGenerationRequest(string prompt)
21+
{
22+
Prompt = prompt;
23+
}
24+
25+
/// <summary>Initializes a new instance of the <see cref="ImageGenerationRequest"/> class.</summary>
26+
/// <param name="prompt">The prompt to guide the image generation.</param>
27+
/// <param name="originalImages">The original images to base edits on.</param>
28+
public ImageGenerationRequest(string prompt, IEnumerable<AIContent>? originalImages)
29+
{
30+
Prompt = prompt;
31+
OriginalImages = originalImages;
32+
}
33+
34+
/// <summary>Gets or sets the prompt to guide the image generation.</summary>
35+
public string? Prompt { get; set; }
36+
37+
/// <summary>
38+
/// Gets or sets the original images to base edits on.
39+
/// </summary>
40+
/// <remarks>
41+
/// If this property is set, the request will behave as an image edit operation.
42+
/// If this property is null or empty, the request will behave as a new image generation operation.
43+
/// </remarks>
44+
public IEnumerable<AIContent>? OriginalImages { get; set; }
45+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Collections.Generic;
5+
using System.Diagnostics.CodeAnalysis;
6+
using System.Text.Json.Serialization;
7+
8+
#pragma warning disable EA0011 // Consider removing unnecessary conditional access operators
9+
10+
namespace Microsoft.Extensions.AI;
11+
12+
/// <summary>Represents the result of an image generation request.</summary>
13+
[Experimental("MEAI001")]
14+
public class ImageGenerationResponse
15+
{
16+
/// <summary>The content items in the generated text response.</summary>
17+
private IList<AIContent>? _contents;
18+
19+
/// <summary>Initializes a new instance of the <see cref="ImageGenerationResponse"/> class.</summary>
20+
[JsonConstructor]
21+
public ImageGenerationResponse()
22+
{
23+
}
24+
25+
/// <summary>Initializes a new instance of the <see cref="ImageGenerationResponse"/> class.</summary>
26+
/// <param name="contents">The contents for this response.</param>
27+
public ImageGenerationResponse(IList<AIContent>? contents)
28+
{
29+
_contents = contents;
30+
}
31+
32+
/// <summary>Gets or sets the raw representation of the image generation response from an underlying implementation.</summary>
33+
/// <remarks>
34+
/// If a <see cref="ImageGenerationResponse"/> is created to represent some underlying object from another object
35+
/// model, this property can be used to store that original object. This can be useful for debugging or
36+
/// for enabling a consumer to access the underlying object model if needed.
37+
/// </remarks>
38+
[JsonIgnore]
39+
public object? RawRepresentation { get; set; }
40+
41+
/// <summary>
42+
/// Gets or sets the generated content items. Content will typically be DataContent for
43+
/// images streamed from the generator or UriContent for remotely hosted images, but may also
44+
/// be provider specific content types that represent the generated images.
45+
/// </summary>
46+
[AllowNull]
47+
public IList<AIContent> Contents
48+
{
49+
get => _contents ??= [];
50+
set => _contents = value;
51+
}
52+
}

0 commit comments

Comments
 (0)