Skip to content

Commit 77eb75c

Browse files
committed
Java: MultiDataSource 导出 CVAuto 数据集新增支持按 ratio 参数对应测试集比例切分数据,优化目录结构
1 parent 9df62d3 commit 77eb75c

File tree

3 files changed

+46
-25
lines changed

3 files changed

+46
-25
lines changed

APIJSON-Java-Server/APIJSONBoot-MultiDataSource/src/main/java/apijson/DatasetUtil.java

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ public static void main(String[] args) {
6868
System.out.println("\nGenerating dataset from JSONObject data...");
6969
List<JSONObject> sampleData = createSampleJSONObjectData();
7070
Set<TaskType> jsonTasks = new HashSet<>(Collections.singletonList(TaskType.DETECTION));
71-
generate("./output/json_dataset", jsonTasks, sampleData);
71+
generate(sampleData, jsonTasks, "./output/detection_dataset/", "train");
7272

7373
} catch (IOException e) {
7474
e.printStackTrace();
@@ -513,12 +513,13 @@ public static void generate(String outputDir, Set<TaskType> tasks) throws IOExce
513513

514514
/**
515515
* 从 List<JSONObject> 数据生成 COCO 数据集
516-
* @param outputDir 输出目录
517-
* @param tasks 任务类型集合
518516
* @param data 包含图片和标注信息的 JSONObject 列表
517+
* @param tasks 任务类型集合
518+
* @param outputDir 输出目录
519+
* @param part train/val
519520
* @throws IOException
520521
*/
521-
public static void generate(String outputDir, Set<TaskType> tasks, List<JSONObject> data) throws IOException {
522+
public static void generate(List<JSONObject> data, Set<TaskType> tasks, String outputDir, String part) throws IOException {
522523
if (data == null || data.isEmpty()) {
523524
throw new IllegalArgumentException("Data list cannot be null or empty");
524525
}
@@ -605,13 +606,15 @@ public static void generate(String outputDir, Set<TaskType> tasks, List<JSONObje
605606
CocoDataset cocoDataset = builder.build();
606607

607608
// 为不同任务生成不同的文件名
608-
String taskName = tasks.iterator().next().toString().toLowerCase();
609-
String outputJsonPath = Paths.get(outputDir, "annotations", "instances_" + taskName + ".json").toString();
609+
String[] keys = StringUtil.split(outputDir, "/");
610+
String taskName = StringUtil.isNotEmpty(part) ? part : (keys != null && keys.length >= 1 ? keys[keys.length - 1] : tasks.iterator().next().toString().toLowerCase());
610611

612+
String outputJsonPath = Paths.get(outputDir, "annotations", taskName + ".json").toString();
611613
writeToFile(cocoDataset, outputJsonPath);
614+
writeToFile(cocoDataset, Paths.get(outputDir, taskName + ".json").toString());
612615

613-
// 复制图片文件到指定目录 outputDir/images/
614-
copyImagesToDirectory(cocoDataset.getImages(), outputDir + "/images/");
616+
// 复制图片文件到指定目录 outputDir/images/ train/val
617+
copyImagesToDirectory(cocoDataset.getImages(), outputDir + taskName + "/");
615618

616619
System.out.println("Successfully generated dataset from JSONObject data at: " + outputDir);
617620
}

APIJSON-Java-Server/APIJSONBoot-MultiDataSource/src/main/java/apijson/boot/FileController.java

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -456,25 +456,29 @@ public ResponseEntity<Object> downloadCVReport(@PathVariable(name = "id") String
456456
public ResponseEntity<Object> downloadDataset(
457457
@PathVariable("id") String idStr,
458458
@RequestParam(name = "type", required = false) String type,
459-
@RequestParam(name = "datasetName", required = false) String datasetName
459+
@RequestParam(name = "ratio", required = false) String ratioStr,
460+
@RequestParam(name = "name", required = false) String datasetName
460461
) throws IOException {
461462
try {
462-
// 参数验证
463+
long repOrDocId = Long.parseLong(idStr);
464+
if (repOrDocId <= 0) {
465+
throw new IllegalArgumentException("id 必须为 > 0 的 reportId 或 documentId 有效整数!");
466+
}
467+
463468
if (StringUtil.isNotEmpty(type)) {
464469
validateCocoType(type);
465470
} else {
466471
type = "";
467472
}
468473

469-
String dataset = StringUtil.isEmpty(datasetName) ? type + "_dataset" : datasetName;
470-
String exportDir = fileUploadRootDir + dataset + "/";
471-
472-
long repOrDocId = Long.parseLong(idStr);
473-
if (repOrDocId <= 0) {
474-
throw new IllegalArgumentException("id 必须为 > 0 的 reportId 或 documentId 有效整数!");
474+
int ratio = StringUtil.isEmpty(ratioStr) ? 20 : Integer.parseInt(ratioStr);
475+
if (ratio < 0 || ratio > 100) {
476+
throw new IllegalArgumentException("测试集比例 ratio 必须为 0 ~ 100 范围内的有效整数!");
475477
}
476478

477-
String name = "CVAuto_" + dataset + repOrDocId + ".zip";
479+
String dataset = StringUtil.isNotEmpty(datasetName) ? datasetName : "CVAuto_" + (StringUtil.isNotEmpty(type) ? type + "_" : "") + "dataset_" + repOrDocId;
480+
String exportDir = fileUploadRootDir + dataset + "/";
481+
String name = dataset + ".zip";
478482
String path = fileUploadRootDir + name;
479483

480484
File file = new File(path);
@@ -499,8 +503,8 @@ public ResponseEntity<Object> downloadDataset(
499503

500504
{ // [] <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
501505
JSONObject item = new JSONObject();
502-
//item.put("count", 0);
503-
item.put("count", 3);
506+
item.put("count", 0);
507+
//item.put("count", 3);
504508
item.put("join", "@/TestRecord");
505509

506510
{ // Random <<<<<<<<<<<<<<<<<<<<<<<<<<<<<
@@ -545,17 +549,29 @@ public ResponseEntity<Object> downloadDataset(
545549
//long documentId = lastTr == null ? 0 : lastTr.getLongValue("documentId");
546550
//long randomId = lastTr == null ? 0 : lastTr.getLongValue("randomId");
547551
if (reportId != repOrDocId) {
548-
name = "CVAuto_" + dataset + (reportId > 0 ? reportId : repOrDocId + "_last") + ".zip";
552+
dataset = StringUtil.isNotEmpty(datasetName) ? datasetName : "CVAuto_" + (StringUtil.isNotEmpty(type) ? type + "_" : "") + "dataset_" + (reportId > 0 ? reportId : repOrDocId + "_last");
553+
exportDir = fileUploadRootDir + dataset + "/";
554+
name = dataset + ".zip";
549555
path = fileUploadRootDir + name;
550556
}
551557

552558
JSONArray array = response.getJSONArray("[]");
553559

554-
List<JSONObject> list = new ArrayList<>();
560+
List<JSONObject> trainList = new ArrayList<>();
561+
List<JSONObject> validList = new ArrayList<>();
555562
if (array != null) {
556-
for (int i = 0; i < array.size(); i++) {
563+
int len = array.size();
564+
for (int i = 0; i < len; i++) {
557565
JSONObject item = array.getJSONObject(i);
558-
list.add(item);
566+
if (item == null || item.isEmpty()) {
567+
continue;
568+
}
569+
570+
if (ratio <= 0 || ratio <= 100 - 100.0*i/len) {
571+
trainList.add(item);
572+
} else {
573+
validList.add(item);
574+
}
559575

560576
//JSONObject random = item == null ? null : item.getJSONObject("Random");
561577
//JSONObject testRecord = item == null ? null : item.getJSONObject("TestRecord");
@@ -586,7 +602,9 @@ public ResponseEntity<Object> downloadDataset(
586602
//generateCocoDatasetFromApiJson(exportDir, type, dataset, list);
587603

588604
Set<DatasetUtil.TaskType> detectionTasks = new HashSet<>(Collections.singletonList(DatasetUtil.TaskType.DETECTION));
589-
DatasetUtil.generate(exportDir, detectionTasks, list);
605+
606+
DatasetUtil.generate(trainList, detectionTasks, exportDir, "train");
607+
DatasetUtil.generate(validList, detectionTasks, exportDir, "val");
590608

591609
createZipFromDirectory(exportDir, path);
592610

APIJSON-Java-Server/APIJSONBoot-MultiDataSource/src/main/resources/static/cv/js/main.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2949,7 +2949,7 @@ https://github.com/Tencent/APIJSON/issues
29492949
}
29502950
else if (this.isRandomShow && this.isRandomListShow) {
29512951
var id = this.reportId || this.getCurrentDocumentId()
2952-
window.open(this.server + '/download/dataset/' + id)
2952+
window.open(this.server + '/download/dataset/' + id + "?ratio=20")
29532953
}
29542954
else if (this.view == 'markdown' || this.view == 'output') { //model
29552955
var clazz = StringUtil.trim(this.exTxt.name)

0 commit comments

Comments
 (0)