This section describes how to train a model by calling ModelArts APIs.
The process of creating a training job using TensorFlow is as follows:
URI: GET https://{ma_endpoint}/v1/{project_id}/job/resource-specs?job_type=train
Request header: X-auth-Token →MIIZmgYJKoZIhvcNAQcCoIIZizCCGYcCAQExDTALBglghkgBZQMEAgEwgXXXXXX...
{
"specs": [
......
{
"spec_id": 7,
"core": "2",
"cpu": "8",
"gpu_num": 0,
"gpu_type": "",
"spec_code": "modelarts.vm.cpu.2u",
"unit_num": 1,
"max_num": 1,
"storage": "",
"interface_type": 1,
"no_resource": false
},
{
"spec_id": 27,
"core": "8",
"cpu": "32",
"gpu_num": 0,
"gpu_type": "",
"spec_code": "modelarts.vm.cpu.8u",
"unit_num": 1,
"max_num": 1,
"storage": "",
"interface_type": 1,
"no_resource": false
}
],
"is_success": true,
"spec_total_count": 5
}
URI: GET https://{ma_endpoint}/v1/{project_id}/job/ai-engines?job_type=train
Request header: X-auth-Token →MIIZmgYJKoZIhvcNAQcCoIIZizCCGYcCAQExDTALBglghkgBZQMEAgEwgXXXXXX...
Set the italic parameters based on site requirements.
{
"engines": [
{
"engine_type": 13,
"engine_name": "Ascend-Powered-Engine",
"engine_id": 130,
"engine_version": "TF-1.15-python3.7-aarch64"
},
......
{
"engine_type": 1,
"engine_name": "TensorFlow",
"engine_id": 3,
"engine_version": "TF-1.8.0-python2.7"
},
{
"engine_type": 1,
"engine_name": "TensorFlow",
"engine_id": 4,
"engine_version": "TF-1.8.0-python3.6"
},
......
{
"engine_type": 9,
"engine_name": "XGBoost-Sklearn",
"engine_id": 100,
"engine_version": "XGBoost-0.80-Sklearn-0.18.1-python3.6"
}
],
"is_success": true
}
Select the engine for creating a training job based on engine_name and engine_version and record engine_id. This section describes how to use TensorFlow to create a job with engine_id set to 4.
URI: POST https://{ma_endpoint}/v1/{project_id}/training-jobs
{
"job_name": "jobtest_TF",
"job_desc": "Use TensorFlow to recognize handwritten digits.",
"config": {
"worker_server_num": 1,
"parameter": [],
"flavor": {
"code": "modelarts.vm.cpu.8u"
},
"train_url": "/test-modelarts/mnist-model/output/",
"engine_id": 4,
"app_url": "/test-modelarts/mnist-tensorflow-code/",
"boot_file_url": "/test-modelarts/mnist-tensorflow-code/train_mnist_tf.py",
"data_source": [
{
"type": "obs",
"data_url": "/test-modelarts/dataset-mnist/"
}
]
},
"notification": {
"topic_urn": "",
"events": []
},
"workspace_id": "0"
}
{
"version_name": "V0001",
"job_name": "jobtest_TF",
"create_time": 1609121837000,
"job_id": 567524,
"resource_id": "jobaedef089",
"version_id": 1108482,
"is_success": true,
"status": 1
}
URI: GET https://{ma_endpoint}/v1/{project_id}/training-jobs/{job_id}/versions/{version_id}
Request header: X-auth-Token →MIIZmgYJKoZIhvcNAQcCoIIZizCCGYcCAQExDTALBglghkgBZQMEAgEwgXXXXXX...
{
"dataset_name": null,
"duration": 1326,
"spec_code": "modelarts.vm.cpu.8u",
"parameter": [],
"start_time": 1609121913000,
"model_outputs": [],
"engine_name": "TensorFlow",
"error_result": null,
"gpu_type": "",
"user_frame_image": null,
"gpu": null,
"dataset_id": null,
"nas_mount_path": null,
"task_summary": {},
"max_num": 1,
"model_metric_list": "{}",
"is_zombie": null,
"flavor_code": "modelarts.vm.cpu.8u",
"gpu_num": 0,
"train_url": "/test-modelarts/mnist-model/output/",
"engine_type": 1,
"job_name": "jobtest_TF",
"nas_type": "efs",
"outputs": null,
"job_id": 567524,
"data_url": "/test-modelarts/dataset-mnist/",
"log_url": null,
"boot_file_url": "/test-modelarts/mnist-tensorflow-code/train_mnist_tf.py",
"volumes": null,
"dataset_version_id": null,
"algorithm_id": null,
"worker_server_num": 1,
"pool_type": "SYSTEM_DEFINED",
"autosearch_config": null,
"job_desc": "Use TensorFlow to recognize handwritten digits.",
"inputs": null,
"model_id": null,
"dataset_version_name": null,
"pool_name": "hec-train-pub-cpu",
"engine_version": "TF-1.8.0-python3.6",
"system_metric_list": {
"recvBytesRate": [
"0",
"0"
],
"cpuUsage": [
"0",
"0"
],
"sendBytesRate": [
"0",
"0"
],
"memUsage": [
"0",
"0"
],
"gpuUtil": [
"0",
"0"
],
"gpuMemUsage": [
"0",
"0"
],
"interval": 1,
"diskWriteRate": [
"0",
"0"
],
"diskReadRate": [
"0",
"0"
]
},
"retrain_model_id": null,
"version_name": "V0001",
"pod_version": "1.8.0-cp36",
"engine_id": 4,
"status": 10,
"cpu": "32",
"user_image_url": null,
"spec_id": 27,
"is_success": true,
"storage": "",
"nas_share_addr": null,
"version_id": 1108482,
"no_resource": false,
"user_command": null,
"resource_id": "jobaedef089",
"core": "8",
"npu_info": null,
"app_url": "/test-modelarts/mnist-tensorflow-code/",
"data_source": [
{
"type": "obs",
"data_url": "/test-modelarts/dataset-mnist/"
}
],
"pre_version_id": null,
"create_time": 1609121837000,
"job_type": 1,
"pool_id": "pool7d1e384a"
}
Learn about the version details of the training job based on the response. status value 10 indicates the training job has been executed.
URI: GET https://{ma_endpoint}/v1/{project_id}/training-jobs/{job_id}/versions/{version_id}/log/file-names
Request header: X-auth-Token →MIIZmgYJKoZIhvcNAQcCoIIZizCCGYcCAQExDTALBglghkgBZQMEAgEwgXXXXXX...
Set the italic parameters based on site requirements.
{
"is_success": true,
"log_file_list": [
"job-jobtest-tf.0"
]
}
Only the log file named job-jobtest-tf.0 is available.
URI: GET https://{ma_endpoint}/v1/{project_id}/training-jobs/{job_id}/versions/{version_id}/aom-log?log_file=job-jobtest-tf.0&lines=8&order=desc
Request header: X-auth-Token →MIIZmgYJKoZIhvcNAQcCoIIZizCCGYcCAQExDTALBglghkgBZQMEAgEwgXXXXXX...
{
"start_line": "1609121886518240330",
"lines": 8,
"is_success": true,
"end_line": "1609121900042593083",
"content": "Done exporting!\n\n[Modelarts Service Log]Training completed.\n\n[ModelArts Service Log]modelarts-pipe: will create log file /tmp/log/jobtest_TF.log\n\n[ModelArts Service Log]modelarts-pipe: will create log file /tmp/log/jobtest_TF.log\n\n[ModelArts Service Log]modelarts-pipe: will write log file /tmp/log/jobtest_TF.log\n\n[ModelArts Service Log]modelarts-pipe: param for max log length: 1073741824\n\n[ModelArts Service Log]modelarts-pipe: param for whether exit on overflow: 0\n\n[ModelArts Service Log]modelarts-pipe: total length: 23303\n"
}
URI: GET https://{ma_endpoint}/v1/{project_id}/training-jobs/{job_id}
Request header: X-auth-Token →MIIZmgYJKoZIhvcNAQcCoIIZizCCGYcCAQExDTALBglghkgBZQMEAgEwgXXXXXX...
Set the italic parameters based on site requirements.
{
"is_success": true
}