| # Florence2 Image Annotator |
|
|
| This is a custom block designed to annotate images via text prompts using the [Florence2]("https://huggingface.co/microsoft/Florence-2-large") model. The model can be used as a processor to generate inpainting masks or bounding box annotations. |
|
|
|
|
| # How to use |
|
|
| ```python |
| import torch |
| from diffusers.modular_pipelines import ModularPipelineBlocks, SequentialPipelineBlocks |
| from diffusers.modular_pipelines.stable_diffusion_xl import INPAINT_BLOCKS |
| from diffusers.utils import load_image |
| |
| # fetch the Florence2 image annotator block that will create our mask |
| image_annotator_block = ModularPipelineBlocks.from_pretrained("diffusers/florence2-image-annotator", trust_remote_code=True) |
| |
| my_blocks = INPAINT_BLOCKS.copy() |
| # insert the annotation block before the image encoding step |
| my_blocks.insert("image_annotator", image_annotator_block, 1) |
| |
| # Create our initial set of inpainting blocks |
| blocks = SequentialPipelineBlocks.from_blocks_dict(my_blocks) |
| |
| repo_id = "diffusers-internal-dev/modular-sdxl-inpainting" |
| pipe = blocks.init_pipeline(repo_id) |
| pipe.load_default_components(torch_dtype=torch.float16, device_map="cuda", trust_remote_code=True) |
| |
| image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true") |
| image = image.resize((1024, 1024)) |
| |
| prompt = ["A red car"] |
| annotation_task = "<REFERRING_EXPRESSION_SEGMENTATION>" |
| annotation_prompt = ["the car"] |
| |
| output = pipe( |
| prompt=prompt, |
| image=image, |
| annotation_task=annotation_task, |
| annotation_prompt=annotation_prompt, |
| annotation_output_type="mask_image", |
| num_inference_steps=35, |
| guidance_scale=7.5, |
| strength=0.95, |
| output_type="pil", |
| ) |
| ``` |
|
|