59 lines
2.6 KiB
Python
59 lines
2.6 KiB
Python
from tqdm import tqdm
|
|
|
|
from ultralytics.yolo.utils.checks import check_requirements
|
|
from ultralytics.yolo.utils.downloads import download
|
|
from ultralytics.yolo.utils.ops import xyxy2xywhn
|
|
|
|
import numpy as np
|
|
from pathlib import Path
|
|
|
|
check_requirements(('pycocotools>=2.0',))
|
|
from pycocotools.coco import COCO
|
|
|
|
# Make Directories
|
|
dir = Path(yaml['path']) # dataset root dir
|
|
for p in 'images', 'labels':
|
|
(dir / p).mkdir(parents=True, exist_ok=True)
|
|
for q in 'train', 'val':
|
|
(dir / p / q).mkdir(parents=True, exist_ok=True)
|
|
|
|
# Train, Val Splits
|
|
for split, patches in [('train', 50 + 1), ('val', 43 + 1)]:
|
|
print(f"Processing {split} in {patches} patches ...")
|
|
images, labels = dir / 'images' / split, dir / 'labels' / split
|
|
|
|
# Download
|
|
url = f"https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/{split}/"
|
|
if split == 'train':
|
|
download([f'{url}zhiyuan_objv2_{split}.tar.gz'], dir=dir) # annotations json
|
|
download([f'{url}patch{i}.tar.gz' for i in range(patches)], dir=images, curl=True, threads=8)
|
|
elif split == 'val':
|
|
download([f'{url}zhiyuan_objv2_{split}.json'], dir=dir) # annotations json
|
|
download([f'{url}images/v1/patch{i}.tar.gz' for i in range(15 + 1)], dir=images, curl=True, threads=8)
|
|
download([f'{url}images/v2/patch{i}.tar.gz' for i in range(16, patches)], dir=images, curl=True, threads=8)
|
|
|
|
# Move
|
|
for f in tqdm(images.rglob('*.jpg'), desc=f'Moving {split} images'):
|
|
f.rename(images / f.name) # move to /images/{split}
|
|
|
|
# Labels
|
|
coco = COCO(dir / f'zhiyuan_objv2_{split}.json')
|
|
names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
|
|
for cid, cat in enumerate(names):
|
|
catIds = coco.getCatIds(catNms=[cat])
|
|
imgIds = coco.getImgIds(catIds=catIds)
|
|
for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
|
|
width, height = im["width"], im["height"]
|
|
path = Path(im["file_name"]) # image filename
|
|
try:
|
|
with open(labels / path.with_suffix('.txt').name, 'a') as file:
|
|
annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
|
|
for a in coco.loadAnns(annIds):
|
|
x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner)
|
|
xyxy = np.array([x, y, x + w, y + h])[None] # pixels(1,4)
|
|
x, y, w, h = xyxy2xywhn(xyxy, w=width, h=height, clip=True)[0] # normalized and clipped
|
|
file.write(f"{cid} {x:.5f} {y:.5f} {w:.5f} {h:.5f}\n")
|
|
except Exception as e:
|
|
print(e)
|
|
|