- Notifications
You must be signed in to change notification settings - Fork 5.9k
/
Copy pathconvert_to_imagefolder.py
32 lines (26 loc) · 1016 Bytes
/
convert_to_imagefolder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
importargparse
importjson
importpathlib
parser=argparse.ArgumentParser()
parser.add_argument(
"--path",
type=str,
required=True,
help="Path to folder with image-text pairs.",
)
parser.add_argument("--caption_column", type=str, default="prompt", help="Name of caption column.")
args=parser.parse_args()
path=pathlib.Path(args.path)
ifnotpath.exists():
raiseRuntimeError(f"`--path` '{args.path}' does not exist.")
all_files=list(path.glob("*"))
captions=list(path.glob("*.txt"))
images=set(all_files) -set(captions)
images= {image.stem: imageforimageinimages}
caption_image= {caption: images.get(caption.stem) forcaptionincaptionsifimages.get(caption.stem)}
metadata=path.joinpath("metadata.jsonl")
withmetadata.open("w", encoding="utf-8") asf:
forcaption, imageincaption_image.items():
caption_text=caption.read_text(encoding="utf-8")
json.dump({"file_name": image.name, args.caption_column: caption_text}, f)
f.write("\n")