diff --git a/docs/modelarts/best-practice/ALL_META.TXT.json b/docs/modelarts/best-practice/ALL_META.TXT.json new file mode 100644 index 00000000..e510b10b --- /dev/null +++ b/docs/modelarts/best-practice/ALL_META.TXT.json @@ -0,0 +1,479 @@ +[ + { + "uri":"modelarts_10_0150.html", + "node_id":"en-us_topic_0000001679516496.xml", + "product_code":"", + "code":"1", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"", + "kw":"Permissions Management", + "search_title":"", + "metedata":[ + { + + } + ], + "title":"Permissions Management", + "githuburl":"" + }, + { + "uri":"modelarts_24_0078.html", + "node_id":"en-us_topic_0000001679679248.xml", + "product_code":"modelarts", + "code":"2", + "des":"ModelArts allows you to configure fine-grained permissions for refined management of resources and permissions. This is commonly used by large enterprises, but it is comp", + "doc_type":"usermanual", + "kw":"Basic Concepts,Permissions Management,Best Practices", + "search_title":"", + "metedata":[ + { + "prodname":"modelarts", + "opensource":"true", + "documenttype":"usermanual", + "IsBot":"Yes", + "IsMulti":"Yes" + } + ], + "title":"Basic Concepts", + "githuburl":"" + }, + { + "uri":"modelarts_24_0079.html", + "node_id":"en-us_topic_0000001727798129.xml", + "product_code":"", + "code":"3", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"", + "kw":"Permission Management Mechanisms", + "search_title":"", + "metedata":[ + { + + } + ], + "title":"Permission Management Mechanisms", + "githuburl":"" + }, + { + "uri":"modelarts_24_0080.html", + "node_id":"en-us_topic_0000001679679160.xml", + "product_code":"", + "code":"4", + "des":"This section describes the IAM permission configurations for all ModelArts functions.If no fine-grained authorization policy is configured for a user created by the admin", + "doc_type":"", + "kw":"IAM,Permission Management Mechanisms,Best Practices", + "search_title":"", + "metedata":[ + { + + } + ], + "title":"IAM", + "githuburl":"" + }, + { + "uri":"modelarts_24_0081.html", + "node_id":"en-us_topic_0000001727718241.xml", + "product_code":"", + "code":"5", + "des":"Function Dependency PoliciesWhen using ModelArts to develop algorithms or manage training jobs, you are required to use other Cloud services. For example, before submitti", + "doc_type":"", + "kw":"Agencies and Dependencies,Permission Management Mechanisms,Best Practices", + "search_title":"", + "metedata":[ + { + + } + ], + "title":"Agencies and Dependencies", + "githuburl":"" + }, + { + "uri":"modelarts_24_0082.html", + "node_id":"en-us_topic_0000001679679168.xml", + "product_code":"", + "code":"6", + "des":"ModelArts allows you to create multiple workspaces to develop algorithms and manage and deploy models for different service objectives. In this way, the development outpu", + "doc_type":"", + "kw":"Workspace,Permission Management Mechanisms,Best Practices", + "search_title":"", + "metedata":[ + { + + } + ], + "title":"Workspace", + "githuburl":"" + }, + { + "uri":"modelarts_24_0084.html", + "node_id":"en-us_topic_0000001727718221.xml", + "product_code":"", + "code":"7", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"", + "kw":"Configuration Practices in Typical Scenarios", + "search_title":"", + "metedata":[ + { + + } + ], + "title":"Configuration Practices in Typical Scenarios", + "githuburl":"" + }, + { + "uri":"modelarts_24_0085.html", + "node_id":"en-us_topic_0000001727798133.xml", + "product_code":"", + "code":"8", + "des":"Certain ModelArts functions require access to Object Storage Service (OBS), Software Repository for Container (SWR), and Intelligent EdgeFabric (IEF). Before using ModelA", + "doc_type":"", + "kw":"Assigning Permissions to Individual Users for Using ModelArts,Configuration Practices in Typical Sce", + "search_title":"", + "metedata":[ + { + + } + ], + "title":"Assigning Permissions to Individual Users for Using ModelArts", + "githuburl":"" + }, + { + "uri":"modelarts_24_0086.html", + "node_id":"en-us_topic_0000001679679164.xml", + "product_code":"", + "code":"9", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"", + "kw":"Assigning Basic Permissions for Using ModelArts", + "search_title":"", + "metedata":[ + { + + } + ], + "title":"Assigning Basic Permissions for Using ModelArts", + "githuburl":"" + }, + { + "uri":"modelarts_10_0062.html", + "node_id":"en-us_topic_0000001727798125.xml", + "product_code":"", + "code":"10", + "des":"Certain ModelArts functions require the permission to access other services. This section describes how to assign specific permissions to IAM users when they use ModelArt", + "doc_type":"", + "kw":"Scenarios,Assigning Basic Permissions for Using ModelArts,Best Practices", + "search_title":"", + "metedata":[ + { + "opensource":"true", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Scenarios", + "githuburl":"" + }, + { + "uri":"modelarts_24_0089.html", + "node_id":"en-us_topic_0000001679838896.xml", + "product_code":"", + "code":"11", + "des":"Multiple IAM users can be created under a tenant user, and the permissions of the IAM users are managed by group. This section describes how to create a user group and IA", + "doc_type":"", + "kw":"Step 1 Create a User Group and Add Users to the User Group,Assigning Basic Permissions for Using Mod", + "search_title":"", + "metedata":[ + { + + } + ], + "title":"Step 1 Create a User Group and Add Users to the User Group", + "githuburl":"" + }, + { + "uri":"modelarts_24_0090.html", + "node_id":"en-us_topic_0000001727798145.xml", + "product_code":"", + "code":"12", + "des":"An IAM user can use cloud services such as ModelArts and OBS only after they are assigned with permissions from the tenant user. This section describes how to assign the ", + "doc_type":"", + "kw":"Step 2 Assigning Permissions for Using Cloud Services,Assigning Basic Permissions for Using ModelArt", + "search_title":"", + "metedata":[ + { + + } + ], + "title":"Step 2 Assigning Permissions for Using Cloud Services", + "githuburl":"" + }, + { + "uri":"modelarts_24_0091.html", + "node_id":"en-us_topic_0000001679838984.xml", + "product_code":"", + "code":"13", + "des":"After assigning IAM permissions, configure ModelArts access authorization for IAM users on the ModelArts page so that ModelArts can access dependent services such as OBS,", + "doc_type":"", + "kw":"Step 3 Configure Agent-based ModelArts Access Authorization for the User,Assigning Basic Permissions", + "search_title":"", + "metedata":[ + { + + } + ], + "title":"Step 3 Configure Agent-based ModelArts Access Authorization for the User", + "githuburl":"" + }, + { + "uri":"modelarts_24_0092.html", + "node_id":"en-us_topic_0000001727798137.xml", + "product_code":"", + "code":"14", + "des":"It takes 15 to 30 minutes for the permissions configured in 4 to take effect. Therefore, wait for 30 minutes after the configuration and then verify the configuration.Log", + "doc_type":"", + "kw":"Step 4 Verify User Permissions,Assigning Basic Permissions for Using ModelArts,Best Practices", + "search_title":"", + "metedata":[ + { + + } + ], + "title":"Step 4 Verify User Permissions", + "githuburl":"" + }, + { + "uri":"modelarts_24_0093.html", + "node_id":"en-us_topic_0000001727718225.xml", + "product_code":"", + "code":"15", + "des":"In small- and medium-sized teams, administrators need to globally control ModelArts resources, and developers only need to focus on their own instances. By default, a dev", + "doc_type":"", + "kw":"Separately Assigning Permissions to Administrators and Developers,Configuration Practices in Typical", + "search_title":"", + "metedata":[ + { + + } + ], + "title":"Separately Assigning Permissions to Administrators and Developers", + "githuburl":"" + }, + { + "uri":"modelarts_24_0095.html", + "node_id":"en-us_topic_0000001679679152.xml", + "product_code":"", + "code":"16", + "des":"Any IAM user granted with the listAllNotebooks and listUsers permissions can click View all on the notebook page to view the instances of all users in the current IAM pro", + "doc_type":"", + "kw":"Viewing the Notebook Instances of All IAM Users Under One Tenant Account,Configuration Practices in ", + "search_title":"", + "metedata":[ + { + + } + ], + "title":"Viewing the Notebook Instances of All IAM Users Under One Tenant Account", + "githuburl":"" + }, + { + "uri":"modelarts_24_0096.html", + "node_id":"en-us_topic_0000001727798225.xml", + "product_code":"modelarts", + "code":"17", + "des":"You can use Cloud Shell provided by the ModelArts console to log in to a running training container.You can use Cloud Shell to log in to a running training container usin", + "doc_type":"usermanual", + "kw":"Logging In to a Training Container Using Cloud Shell,Configuration Practices in Typical Scenarios,Be", + "search_title":"", + "metedata":[ + { + "prodname":"modelarts", + "IsMulti":"No", + "IsBot":"No", + "opensource":"true", + "documenttype":"usermanual" + } + ], + "title":"Logging In to a Training Container Using Cloud Shell", + "githuburl":"" + }, + { + "uri":"modelarts_24_0097.html", + "node_id":"en-us_topic_0000001727718321.xml", + "product_code":"", + "code":"18", + "des":"This section describes how to control the ModelArts permissions of a user so that the user is not allowed to use a public resource pool to create training jobs, create no", + "doc_type":"", + "kw":"Prohibiting a User from Using a Public Resource Pool,Configuration Practices in Typical Scenarios,Be", + "search_title":"", + "metedata":[ + { + + } + ], + "title":"Prohibiting a User from Using a Public Resource Pool", + "githuburl":"" + }, + { + "uri":"modelarts_10_0001.html", + "node_id":"en-us_topic_0000001727355869.xml", + "product_code":"", + "code":"19", + "des":"ModelArts provides ExeML for service developers, freeing you from model development and parameter tuning. With ExeML, you can finish an AI development project in just thr", + "doc_type":"", + "kw":"Huawei Cloud Mascot Detection (Using ExeML for Object Detection),Best Practices", + "search_title":"", + "metedata":[ + { + "opensource":"true" + } + ], + "title":"Huawei Cloud Mascot Detection (Using ExeML for Object Detection)", + "githuburl":"" + }, + { + "uri":"modelarts_10_0002.html", + "node_id":"en-us_topic_0000001727435941.xml", + "product_code":"", + "code":"20", + "des":"Banks often predict whether customers would be interested in a time deposit based on their characteristics, including the age, work type, marital status, education backgr", + "doc_type":"", + "kw":"Bank Deposit Prediction (Using ExeML for Predictive Analytics),Best Practices", + "search_title":"", + "metedata":[ + { + "opensource":"true", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Bank Deposit Prediction (Using ExeML for Predictive Analytics)", + "githuburl":"" + }, + { + "uri":"modelarts_10_0080.html", + "node_id":"en-us_topic_0000001679356792.xml", + "product_code":"", + "code":"21", + "des":"This section describes how to modify a local custom algorithm to train and deploy models on ModelArts.This case describes how to use PyTorch 1.8 to recognize handwritten ", + "doc_type":"", + "kw":"Using a Custom Algorithm to Build a Handwritten Digit Recognition Model,Best Practices", + "search_title":"", + "metedata":[ + { + "opensource":"true", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Using a Custom Algorithm to Build a Handwritten Digit Recognition Model", + "githuburl":"" + }, + { + "uri":"develop-modelarts-0143.html", + "node_id":"en-us_topic_0000001679516552.xml", + "product_code":"modelarts", + "code":"22", + "des":"This section describes how to create an image and use it for training on ModelArts. The AI engine used in the image is horovod_0.22.1-pytorch_1.8.1, and the resources use", + "doc_type":"usermanual", + "kw":"Example: Creating a Custom Image for Training (Horovod-PyTorch and GPUs),Best Practices", + "search_title":"", + "metedata":[ + { + "prodname":"modelarts", + "documenttype":"usermanual" + } + ], + "title":"Example: Creating a Custom Image for Training (Horovod-PyTorch and GPUs)", + "githuburl":"" + }, + { + "uri":"develop-modelarts-0144.html", + "node_id":"en-us_topic_0000001679516596.xml", + "product_code":"modelarts", + "code":"23", + "des":"This section describes how to create an image and use it for training on ModelArts. The AI engine used in the image is MindSpore, and the resources used for training are ", + "doc_type":"usermanual", + "kw":"Example: Creating a Custom Image for Training (MindSpore and GPUs),Best Practices", + "search_title":"", + "metedata":[ + { + "prodname":"modelarts", + "documenttype":"usermanual" + } + ], + "title":"Example: Creating a Custom Image for Training (MindSpore and GPUs)", + "githuburl":"" + }, + { + "uri":"develop-modelarts-0145.html", + "node_id":"en-us_topic_0000001727355837.xml", + "product_code":"modelarts", + "code":"24", + "des":"This section describes how to create an image and use it for training on ModelArts. The AI engine used in the image is TensorFlow, and the resources used for training are", + "doc_type":"usermanual", + "kw":"Example: Creating a Custom Image for Training (TensorFlow and GPUs),Best Practices", + "search_title":"", + "metedata":[ + { + "prodname":"modelarts", + "documenttype":"usermanual" + } + ], + "title":"Example: Creating a Custom Image for Training (TensorFlow and GPUs)", + "githuburl":"" + }, + { + "uri":"modelarts_10_0072.html", + "node_id":"en-us_topic_0000001727355817.xml", + "product_code":"modelarts", + "code":"25", + "des":"If you want to use an AI engine that is not supported by ModelArts, create a custom image for the engine, import the image to ModelArts, and use the image to create AI ap", + "doc_type":"usermanual", + "kw":"Creating a Custom Image and Using It to Create an AI Application,Best Practices", + "search_title":"", + "metedata":[ + { + "prodname":"modelarts", + "documenttype":"usermanual" + } + ], + "title":"Creating a Custom Image and Using It to Create an AI Application", + "githuburl":"" + }, + { + "uri":"modelarts_04_0203.html", + "node_id":"en-us_topic_0000001679516500.xml", + "product_code":"", + "code":"26", + "des":"This section describes how to enable an inference service to access the Internet.An inference service accesses the Internet in the following scenarios:After an image is i", + "doc_type":"", + "kw":"Enabling an Inference Service to Access the Internet,Best Practices", + "search_title":"", + "metedata":[ + { + + } + ], + "title":"Enabling an Inference Service to Access the Internet", + "githuburl":"" + }, + { + "uri":"modelarts_04_0204.html", + "node_id":"en-us_topic_0000001727355769.xml", + "product_code":"", + "code":"27", + "des":"The end-to-end O&M of ModelArts inference services involves the entire AI process including algorithm development, service O&M, and service running.End-to-End O&M Process", + "doc_type":"", + "kw":"End-to-End O&M of Inference Services,Best Practices", + "search_title":"", + "metedata":[ + { + + } + ], + "title":"End-to-End O&M of Inference Services", + "githuburl":"" + } +] \ No newline at end of file diff --git a/docs/modelarts/best-practice/CLASS.TXT.json b/docs/modelarts/best-practice/CLASS.TXT.json new file mode 100644 index 00000000..d68f1f18 --- /dev/null +++ b/docs/modelarts/best-practice/CLASS.TXT.json @@ -0,0 +1,245 @@ +[ + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"", + "title":"Permissions Management", + "uri":"modelarts_10_0150.html", + "doc_type":"", + "p_code":"", + "code":"1" + }, + { + "desc":"ModelArts allows you to configure fine-grained permissions for refined management of resources and permissions. This is commonly used by large enterprises, but it is comp", + "product_code":"modelarts", + "title":"Basic Concepts", + "uri":"modelarts_24_0078.html", + "doc_type":"usermanual", + "p_code":"1", + "code":"2" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"", + "title":"Permission Management Mechanisms", + "uri":"modelarts_24_0079.html", + "doc_type":"", + "p_code":"1", + "code":"3" + }, + { + "desc":"This section describes the IAM permission configurations for all ModelArts functions.If no fine-grained authorization policy is configured for a user created by the admin", + "product_code":"", + "title":"IAM", + "uri":"modelarts_24_0080.html", + "doc_type":"", + "p_code":"3", + "code":"4" + }, + { + "desc":"Function Dependency PoliciesWhen using ModelArts to develop algorithms or manage training jobs, you are required to use other Cloud services. For example, before submitti", + "product_code":"", + "title":"Agencies and Dependencies", + "uri":"modelarts_24_0081.html", + "doc_type":"", + "p_code":"3", + "code":"5" + }, + { + "desc":"ModelArts allows you to create multiple workspaces to develop algorithms and manage and deploy models for different service objectives. In this way, the development outpu", + "product_code":"", + "title":"Workspace", + "uri":"modelarts_24_0082.html", + "doc_type":"", + "p_code":"3", + "code":"6" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"", + "title":"Configuration Practices in Typical Scenarios", + "uri":"modelarts_24_0084.html", + "doc_type":"", + "p_code":"1", + "code":"7" + }, + { + "desc":"Certain ModelArts functions require access to Object Storage Service (OBS), Software Repository for Container (SWR), and Intelligent EdgeFabric (IEF). Before using ModelA", + "product_code":"", + "title":"Assigning Permissions to Individual Users for Using ModelArts", + "uri":"modelarts_24_0085.html", + "doc_type":"", + "p_code":"7", + "code":"8" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"", + "title":"Assigning Basic Permissions for Using ModelArts", + "uri":"modelarts_24_0086.html", + "doc_type":"", + "p_code":"7", + "code":"9" + }, + { + "desc":"Certain ModelArts functions require the permission to access other services. This section describes how to assign specific permissions to IAM users when they use ModelArt", + "product_code":"", + "title":"Scenarios", + "uri":"modelarts_10_0062.html", + "doc_type":"", + "p_code":"9", + "code":"10" + }, + { + "desc":"Multiple IAM users can be created under a tenant user, and the permissions of the IAM users are managed by group. This section describes how to create a user group and IA", + "product_code":"", + "title":"Step 1 Create a User Group and Add Users to the User Group", + "uri":"modelarts_24_0089.html", + "doc_type":"", + "p_code":"9", + "code":"11" + }, + { + "desc":"An IAM user can use cloud services such as ModelArts and OBS only after they are assigned with permissions from the tenant user. This section describes how to assign the ", + "product_code":"", + "title":"Step 2 Assigning Permissions for Using Cloud Services", + "uri":"modelarts_24_0090.html", + "doc_type":"", + "p_code":"9", + "code":"12" + }, + { + "desc":"After assigning IAM permissions, configure ModelArts access authorization for IAM users on the ModelArts page so that ModelArts can access dependent services such as OBS,", + "product_code":"", + "title":"Step 3 Configure Agent-based ModelArts Access Authorization for the User", + "uri":"modelarts_24_0091.html", + "doc_type":"", + "p_code":"9", + "code":"13" + }, + { + "desc":"It takes 15 to 30 minutes for the permissions configured in 4 to take effect. Therefore, wait for 30 minutes after the configuration and then verify the configuration.Log", + "product_code":"", + "title":"Step 4 Verify User Permissions", + "uri":"modelarts_24_0092.html", + "doc_type":"", + "p_code":"9", + "code":"14" + }, + { + "desc":"In small- and medium-sized teams, administrators need to globally control ModelArts resources, and developers only need to focus on their own instances. By default, a dev", + "product_code":"", + "title":"Separately Assigning Permissions to Administrators and Developers", + "uri":"modelarts_24_0093.html", + "doc_type":"", + "p_code":"7", + "code":"15" + }, + { + "desc":"Any IAM user granted with the listAllNotebooks and listUsers permissions can click View all on the notebook page to view the instances of all users in the current IAM pro", + "product_code":"", + "title":"Viewing the Notebook Instances of All IAM Users Under One Tenant Account", + "uri":"modelarts_24_0095.html", + "doc_type":"", + "p_code":"7", + "code":"16" + }, + { + "desc":"You can use Cloud Shell provided by the ModelArts console to log in to a running training container.You can use Cloud Shell to log in to a running training container usin", + "product_code":"modelarts", + "title":"Logging In to a Training Container Using Cloud Shell", + "uri":"modelarts_24_0096.html", + "doc_type":"usermanual", + "p_code":"7", + "code":"17" + }, + { + "desc":"This section describes how to control the ModelArts permissions of a user so that the user is not allowed to use a public resource pool to create training jobs, create no", + "product_code":"", + "title":"Prohibiting a User from Using a Public Resource Pool", + "uri":"modelarts_24_0097.html", + "doc_type":"", + "p_code":"7", + "code":"18" + }, + { + "desc":"ModelArts provides ExeML for service developers, freeing you from model development and parameter tuning. With ExeML, you can finish an AI development project in just thr", + "product_code":"", + "title":"Huawei Cloud Mascot Detection (Using ExeML for Object Detection)", + "uri":"modelarts_10_0001.html", + "doc_type":"", + "p_code":"", + "code":"19" + }, + { + "desc":"Banks often predict whether customers would be interested in a time deposit based on their characteristics, including the age, work type, marital status, education backgr", + "product_code":"", + "title":"Bank Deposit Prediction (Using ExeML for Predictive Analytics)", + "uri":"modelarts_10_0002.html", + "doc_type":"", + "p_code":"", + "code":"20" + }, + { + "desc":"This section describes how to modify a local custom algorithm to train and deploy models on ModelArts.This case describes how to use PyTorch 1.8 to recognize handwritten ", + "product_code":"", + "title":"Using a Custom Algorithm to Build a Handwritten Digit Recognition Model", + "uri":"modelarts_10_0080.html", + "doc_type":"", + "p_code":"", + "code":"21" + }, + { + "desc":"This section describes how to create an image and use it for training on ModelArts. The AI engine used in the image is horovod_0.22.1-pytorch_1.8.1, and the resources use", + "product_code":"modelarts", + "title":"Example: Creating a Custom Image for Training (Horovod-PyTorch and GPUs)", + "uri":"develop-modelarts-0143.html", + "doc_type":"usermanual", + "p_code":"", + "code":"22" + }, + { + "desc":"This section describes how to create an image and use it for training on ModelArts. The AI engine used in the image is MindSpore, and the resources used for training are ", + "product_code":"modelarts", + "title":"Example: Creating a Custom Image for Training (MindSpore and GPUs)", + "uri":"develop-modelarts-0144.html", + "doc_type":"usermanual", + "p_code":"", + "code":"23" + }, + { + "desc":"This section describes how to create an image and use it for training on ModelArts. The AI engine used in the image is TensorFlow, and the resources used for training are", + "product_code":"modelarts", + "title":"Example: Creating a Custom Image for Training (TensorFlow and GPUs)", + "uri":"develop-modelarts-0145.html", + "doc_type":"usermanual", + "p_code":"", + "code":"24" + }, + { + "desc":"If you want to use an AI engine that is not supported by ModelArts, create a custom image for the engine, import the image to ModelArts, and use the image to create AI ap", + "product_code":"modelarts", + "title":"Creating a Custom Image and Using It to Create an AI Application", + "uri":"modelarts_10_0072.html", + "doc_type":"usermanual", + "p_code":"", + "code":"25" + }, + { + "desc":"This section describes how to enable an inference service to access the Internet.An inference service accesses the Internet in the following scenarios:After an image is i", + "product_code":"", + "title":"Enabling an Inference Service to Access the Internet", + "uri":"modelarts_04_0203.html", + "doc_type":"", + "p_code":"", + "code":"26" + }, + { + "desc":"The end-to-end O&M of ModelArts inference services involves the entire AI process including algorithm development, service O&M, and service running.End-to-End O&M Process", + "product_code":"", + "title":"End-to-End O&M of Inference Services", + "uri":"modelarts_04_0204.html", + "doc_type":"", + "p_code":"", + "code":"27" + } +] \ No newline at end of file diff --git a/docs/modelarts/best-practice/PARAMETERS.txt b/docs/modelarts/best-practice/PARAMETERS.txt new file mode 100644 index 00000000..6da8d5f0 --- /dev/null +++ b/docs/modelarts/best-practice/PARAMETERS.txt @@ -0,0 +1,3 @@ +version="" +language="en-us" +type="" \ No newline at end of file diff --git a/docs/modelarts/best-practice/develop-modelarts-0143.html b/docs/modelarts/best-practice/develop-modelarts-0143.html new file mode 100644 index 00000000..ca646e2b --- /dev/null +++ b/docs/modelarts/best-practice/develop-modelarts-0143.html @@ -0,0 +1,496 @@ + + +

Example: Creating a Custom Image for Training (Horovod-PyTorch and GPUs)

+

This section describes how to create an image and use it for training on ModelArts. The AI engine used in the image is horovod_0.22.1-pytorch_1.8.1, and the resources used for training are GPUs.

+

This section applies only to training jobs of the new version.

+
+

Scenario

In this example, write a Dockerfile to create a custom image on a Linux x86_64 server running Ubuntu 18.04.

+

Create a container image with the following configurations and use the image to create a CPU- or GPU-powered training job on ModelArts:

+ +
+

Procedure

Before using a custom image to create a training job, you need to be familiar with Docker and have development experience.

+
  1. Prerequisites
  2. Step 1 Creating an OBS Bucket and Folder
  3. Step 2 Preparing the Training Script and Uploading It to OBS
  4. Step 3 Preparing a Server
  5. Step 4 Creating a Custom Image
  6. Step 5 Uploading the Image to SWR
  7. Step 6 Creating a Training Job on ModelArts
+
+

Prerequisites

You have registered a Huawei Cloud account. The account is not in arrears or frozen.

+
+

Step 1 Creating an OBS Bucket and Folder

Create a bucket and folders in OBS for storing the sample dataset and training code. Table 1 lists the folders to be created. Replace the bucket name and folder names in the example with actual names.

+

For details about how to create an OBS bucket and folder, see Creating a Bucket and Creating a Folder.

+

Ensure that the OBS directory you use and ModelArts are in the same region.

+ +
+ + + + + + + + + + +
Table 1 Folder to create

Name

+

Description

+

obs://test-modelarts/pytorch/demo-code/

+

Stores the training script.

+

obs://test-modelarts/pytorch/log/

+

Stores training log files.

+
+
+
+

Step 2 Preparing the Training Script and Uploading It to OBS

Obtain training scripts pytorch_synthetic_benchmark.py and run_mpi.sh and upload them to obs://test-modelarts/horovod/demo-code/ in the OBS bucket.

+

pytorch_synthetic_benchmark.py is as follows:

+
import argparse
+import torch.backends.cudnn as cudnn
+import torch.nn.functional as F
+import torch.optim as optim
+import torch.utils.data.distributed
+from torchvision import models
+import horovod.torch as hvd
+import timeit
+import numpy as np
+
+# Benchmark settings
+parser = argparse.ArgumentParser(description='PyTorch Synthetic Benchmark',
+                                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument('--fp16-allreduce', action='store_true', default=False,
+                    help='use fp16 compression during allreduce')
+
+parser.add_argument('--model', type=str, default='resnet50',
+                    help='model to benchmark')
+parser.add_argument('--batch-size', type=int, default=32,
+                    help='input batch size')
+
+parser.add_argument('--num-warmup-batches', type=int, default=10,
+                    help='number of warm-up batches that don\'t count towards benchmark')
+parser.add_argument('--num-batches-per-iter', type=int, default=10,
+                    help='number of batches per benchmark iteration')
+parser.add_argument('--num-iters', type=int, default=10,
+                    help='number of benchmark iterations')
+
+parser.add_argument('--no-cuda', action='store_true', default=False,
+                    help='disables CUDA training')
+
+parser.add_argument('--use-adasum', action='store_true', default=False,
+                    help='use adasum algorithm to do reduction')
+
+args = parser.parse_args()
+args.cuda = not args.no_cuda and torch.cuda.is_available()
+
+hvd.init()
+
+if args.cuda:
+    # Horovod: pin GPU to local rank.
+    torch.cuda.set_device(hvd.local_rank())
+
+cudnn.benchmark = True
+
+# Set up standard model.
+model = getattr(models, args.model)()
+
+# By default, Adasum doesn't need scaling up learning rate.
+lr_scaler = hvd.size() if not args.use_adasum else 1
+
+if args.cuda:
+    # Move model to GPU.
+    model.cuda()
+    # If using GPU Adasum allreduce, scale learning rate by local_size.
+    if args.use_adasum and hvd.nccl_built():
+        lr_scaler = hvd.local_size()
+
+optimizer = optim.SGD(model.parameters(), lr=0.01 * lr_scaler)
+
+# Horovod: (optional) compression algorithm.
+compression = hvd.Compression.fp16 if args.fp16_allreduce else hvd.Compression.none
+
+# Horovod: wrap optimizer with DistributedOptimizer.
+optimizer = hvd.DistributedOptimizer(optimizer,
+                                     named_parameters=model.named_parameters(),
+                                     compression=compression,
+                                     op=hvd.Adasum if args.use_adasum else hvd.Average)
+
+# Horovod: broadcast parameters & optimizer state.
+hvd.broadcast_parameters(model.state_dict(), root_rank=0)
+hvd.broadcast_optimizer_state(optimizer, root_rank=0)
+
+# Set up fixed fake data
+data = torch.randn(args.batch_size, 3, 224, 224)
+target = torch.LongTensor(args.batch_size).random_() % 1000
+if args.cuda:
+    data, target = data.cuda(), target.cuda()
+
+
+def benchmark_step():
+    optimizer.zero_grad()
+    output = model(data)
+    loss = F.cross_entropy(output, target)
+    loss.backward()
+    optimizer.step()
+
+
+def log(s, nl=True):
+    if hvd.rank() != 0:
+        return
+    print(s, end='\n' if nl else '')
+
+
+log('Model: %s' % args.model)
+log('Batch size: %d' % args.batch_size)
+device = 'GPU' if args.cuda else 'CPU'
+log('Number of %ss: %d' % (device, hvd.size()))
+
+# Warm-up
+log('Running warmup...')
+timeit.timeit(benchmark_step, number=args.num_warmup_batches)
+
+# Benchmark
+log('Running benchmark...')
+img_secs = []
+for x in range(args.num_iters):
+    time = timeit.timeit(benchmark_step, number=args.num_batches_per_iter)
+    img_sec = args.batch_size * args.num_batches_per_iter / time
+    log('Iter #%d: %.1f img/sec per %s' % (x, img_sec, device))
+    img_secs.append(img_sec)
+
+# Results
+img_sec_mean = np.mean(img_secs)
+img_sec_conf = 1.96 * np.std(img_secs)
+log('Img/sec per %s: %.1f +-%.1f' % (device, img_sec_mean, img_sec_conf))
+log('Total img/sec on %d %s(s): %.1f +-%.1f' %
+    (hvd.size(), device, hvd.size() * img_sec_mean, hvd.size() * img_sec_conf))
+

run_mpi.sh is as follows:

+
#!/bin/bash
+MY_HOME=/home/ma-user
+
+MY_SSHD_PORT=${MY_SSHD_PORT:-"36666"}
+
+MY_MPI_BTL_TCP_IF=${MY_MPI_BTL_TCP_IF:-"eth0,bond0"}
+
+MY_TASK_INDEX=${MA_TASK_INDEX:-${VC_TASK_INDEX:-${VK_TASK_INDEX}}}
+
+MY_MPI_SLOTS=${MY_MPI_SLOTS:-"${MA_NUM_GPUS}"}
+
+MY_MPI_TUNE_FILE="${MY_HOME}/env_for_user_process"
+
+if [ -z ${MY_MPI_SLOTS} ]; then
+    echo "[run_mpi] MY_MPI_SLOTS is empty, set it be 1"
+    MY_MPI_SLOTS="1"
+fi
+
+printf "MY_HOME: ${MY_HOME}\nMY_SSHD_PORT: ${MY_SSHD_PORT}\nMY_MPI_BTL_TCP_IF: ${MY_MPI_BTL_TCP_IF}\nMY_TASK_INDEX: ${MY_TASK_INDEX}\nMY_MPI_SLOTS: ${MY_MPI_SLOTS}\n"
+
+env | grep -E '^MA_|^AWS_|^S3_|^PATH|^VC_WORKER_|^SCC|^CRED' | grep -v '=$' > ${MY_MPI_TUNE_FILE}
+# add -x to each line
+sed -i 's/^/-x /' ${MY_MPI_TUNE_FILE}
+
+sed -i "s|{{MY_SSHD_PORT}}|${MY_SSHD_PORT}|g" ${MY_HOME}/etc/ssh/sshd_config
+
+# start sshd service
+bash -c "$(which sshd) -f ${MY_HOME}/etc/ssh/sshd_config"
+
+# confirm the sshd is up
+netstat -anp | grep LIS | grep ${MY_SSHD_PORT}
+
+if [ $MY_TASK_INDEX -eq 0 ]; then
+    # generate the hostfile of mpi
+    for ((i=0; i<$MA_NUM_HOSTS; i++))
+    do
+        eval hostname=${MA_VJ_NAME}-${MA_TASK_NAME}-${i}.${MA_VJ_NAME}
+        echo "[run_mpi] hostname: ${hostname}"
+
+        ip=""
+        while [ -z "$ip" ]; do
+            ip=$(ping -c 1 ${hostname} | grep "PING" | sed -E 's/PING .* .([0-9.]+). .*/\1/g')
+            sleep 1
+        done
+        echo "[run_mpi] resolved ip: ${ip}"
+
+        # test the sshd is up
+        while :
+        do
+            if [ cat < /dev/null >/dev/tcp/${ip}/${MY_SSHD_PORT} ]; then
+                break
+            fi
+            sleep 1
+        done
+
+        echo "[run_mpi] the sshd of ip ${ip} is up"
+
+        echo "${ip} slots=$MY_MPI_SLOTS" >> ${MY_HOME}/hostfile
+    done
+
+    printf "[run_mpi] hostfile:\n`cat ${MY_HOME}/hostfile`\n"
+fi
+
+RET_CODE=0
+
+if [ $MY_TASK_INDEX -eq 0 ]; then
+
+    echo "[run_mpi] start exec command time: "$(date +"%Y-%m-%d-%H:%M:%S")
+
+    np=$(( ${MA_NUM_HOSTS} * ${MY_MPI_SLOTS} ))
+
+    echo "[run_mpi] command: mpirun -np ${np} -hostfile ${MY_HOME}/hostfile -mca plm_rsh_args \"-p ${MY_SSHD_PORT}\" -tune ${MY_MPI_TUNE_FILE} ... $@"
+
+    # execute mpirun at worker-0
+    # mpirun
+    mpirun \
+        -np ${np} \
+        -hostfile ${MY_HOME}/hostfile \
+        -mca plm_rsh_args "-p ${MY_SSHD_PORT}" \
+        -tune ${MY_MPI_TUNE_FILE} \
+        -bind-to none -map-by slot \
+        -x NCCL_DEBUG=INFO -x NCCL_SOCKET_IFNAME=${MY_MPI_BTL_TCP_IF} -x NCCL_SOCKET_FAMILY=AF_INET \
+        -x HOROVOD_MPI_THREADS_DISABLE=1 \
+        -x LD_LIBRARY_PATH \
+        -mca pml ob1 -mca btl ^openib -mca plm_rsh_no_tree_spawn true \
+        "$@"
+
+    RET_CODE=$?
+
+    if [ $RET_CODE -ne 0 ]; then
+        echo "[run_mpi] exec command failed, exited with $RET_CODE"
+    else
+        echo "[run_mpi] exec command successfully, exited with $RET_CODE"
+    fi
+
+    # stop 1...N worker by killing the sleep proc
+    sed -i '1d' ${MY_HOME}/hostfile
+    if [ `cat ${MY_HOME}/hostfile | wc -l` -ne 0 ]; then
+        echo "[run_mpi] stop 1 to (N - 1) worker by killing the sleep proc"
+
+        sed -i 's/${MY_MPI_SLOTS}/1/g' ${MY_HOME}/hostfile
+        printf "[run_mpi] hostfile:\n`cat ${MY_HOME}/hostfile`\n"
+
+        mpirun \
+        --hostfile ${MY_HOME}/hostfile \
+        --mca btl_tcp_if_include ${MY_MPI_BTL_TCP_IF} \
+        --mca plm_rsh_args "-p ${MY_SSHD_PORT}" \
+        -x PATH -x LD_LIBRARY_PATH \
+        pkill sleep \
+        > /dev/null 2>&1
+    fi
+
+    echo "[run_mpi] exit time: "$(date +"%Y-%m-%d-%H:%M:%S")
+else
+    echo "[run_mpi] the training log is in worker-0"
+    sleep 365d
+    echo "[run_mpi] exit time: "$(date +"%Y-%m-%d-%H:%M:%S")
+fi
+
+exit $RET_CODE
+
+

Step 3 Preparing a Server

Obtain a Linux x86_64 server running Ubuntu 18.04. Either an ECS or your local PC will do.

+
For details about how to purchase an ECS, see Purchasing and Logging In to a Linux ECS. When creating the ECS, select an Ubuntu 18.04 public image.
Figure 1 Creating an ECS using a public image (x86)
+
+
+

Step 4 Creating a Custom Image

Create a container image with the following configurations and use the image to create a training job on ModelArts:

+ +

This section describes how to write a Dockerfile to create a custom image.

+
  1. Install Docker.

    The following uses Linux x86_64 as an example to describe how to obtain a Docker installation package. For more details about how to install Docker, see official Docker documents. Run the following command to install Docker:

    +
    curl -fsSL get.docker.com -o get-docker.sh
    +sh get-docker.sh
    +

    If the docker images command can be executed, Docker has been installed. In this case, skip this step.

    +
  2. Check the Docker Engine version. Run the following command:
    docker version | grep -A 1 Engine
    +
    The following information is displayed:
     Engine:
    +  Version:          18.09.0
    +
    +

    Use the Docker engine of the preceding version or later to create a custom image.

    +
    +
  3. Create a folder named context.
    mkdir -p context
    +
  4. Obtain the pip.conf file. In this example, the pip source provided by Huawei Mirrors is used, which is as follows:
    [global]
    +index-url = https://repo.huaweicloud.com/repository/pypi/simple
    +trusted-host = repo.huaweicloud.com
    +timeout = 120
    +

    To obtain pip.conf, switch to Huawei Mirrors https://mirrors.huaweicloud.com/home and search for pypi.

    +
    +
  5. Download the source Horovod code file.

    Download horovod-0.22.1.tar.gz from https://pypi.org/project/horovod/0.22.1/#files.

    +
  6. Download .whl files.

    Download the following .whl files from https://download.pytorch.org/whl/torch_stable.html.

    +
    • torch-1.8.1+cu111-cp37-cp37m-linux_x86_64.whl
    • torchaudio-0.8.1-cp37-cp37m-linux_x86_64.whl
    • torchvision-0.9.1+cu111-cp37-cp37m-linux_x86_64.whl
    +

    The URL code of the plus sign (+) is %2B. When searching for files in the preceding websites, replace the plus sign (+) in the file name with %2B, for example, torch-1.8.1%2Bcu111-cp37-cp37m-linux_x86_64.whl.

    +
    +
  7. Download the Miniconda3 installation file.

    Download Miniconda3-py37_4.12.0-Linux-x86_64.sh from https://repo.anaconda.com/miniconda/Miniconda3-py37_4.12.0-Linux-x86_64.sh.

    +
  8. Write the container image Dockerfile.
    Create an empty file named Dockerfile in the context folder and copy the following content to the file:
    # The server on which the container image is created must access the Internet.
    +
    +# Base container image at https://github.com/NVIDIA/nvidia-docker/wiki/CUDA
    +#
    +# https://docs.docker.com/develop/develop-images/multistage-build/#use-multi-stage-builds
    +# require Docker Engine >= 17.05
    +#
    +# builder stage
    +FROM nvidia/cuda:11.1.1-devel-ubuntu18.04 AS builder
    +
    +# Install CMake obtained from Huawei Mirrors.
    +RUN cp -a /etc/apt/sources.list /etc/apt/sources.list.bak && \
    +    sed -i "s@http://.*archive.ubuntu.com@http://repo.huaweicloud.com@g" /etc/apt/sources.list && \
    +    sed -i "s@http://.*security.ubuntu.com@http://repo.huaweicloud.com@g" /etc/apt/sources.list && \
    +    echo > /etc/apt/apt.conf.d/00skip-verify-peer.conf "Acquire { https::Verify-Peer false }" && \
    +    apt-get update && \
    +    apt-get install -y build-essential cmake g++-7 && \
    +    apt-get clean && \
    +    mv /etc/apt/sources.list.bak /etc/apt/sources.list && \
    +    rm /etc/apt/apt.conf.d/00skip-verify-peer.conf
    +
    +# The default user of the base container image is root.
    +# USER root
    +
    +# Use the PyPI configuration obtained from Huawei Mirrors.
    +RUN mkdir -p /root/.pip/
    +COPY pip.conf /root/.pip/pip.conf
    +
    +# Copy the installation files to the /tmp directory in the base container image.
    +COPY Miniconda3-py37_4.12.0-Linux-x86_64.sh /tmp
    +COPY torch-1.8.1+cu111-cp37-cp37m-linux_x86_64.whl /tmp
    +COPY torchvision-0.9.1+cu111-cp37-cp37m-linux_x86_64.whl /tmp
    +COPY torchaudio-0.8.1-cp37-cp37m-linux_x86_64.whl /tmp
    +COPY openmpi-3.0.0-bin.tar.gz /tmp
    +COPY horovod-0.22.1.tar.gz /tmp
    +
    +# https://conda.io/projects/conda/en/latest/user-guide/install/linux.html#installing-on-linux
    +# Install Miniconda3 in the /home/ma-user/miniconda3 directory of the base container image.
    +RUN bash /tmp/Miniconda3-py37_4.12.0-Linux-x86_64.sh -b -p /home/ma-user/miniconda3
    +
    +# Install the Open MPI 3.0.0 file obtained from Horovod v0.22.1.
    +# https://github.com/horovod/horovod/blob/v0.22.1/docker/horovod/Dockerfile
    +# https://github.com/horovod/horovod/files/1596799/openmpi-3.0.0-bin.tar.gz
    +RUN cd /usr/local && \
    +    tar -zxf /tmp/openmpi-3.0.0-bin.tar.gz && \
    +    ldconfig && \
    +    mpirun --version
    +
    +# Environment variables required for building Horovod with PyTorch
    +ENV HOROVOD_NCCL_INCLUDE=/usr/include \
    +    HOROVOD_NCCL_LIB=/usr/lib/x86_64-linux-gnu \
    +    HOROVOD_MPICXX_SHOW="/usr/local/openmpi/bin/mpicxx -show" \
    +    HOROVOD_GPU_OPERATIONS=NCCL \
    +    HOROVOD_WITH_PYTORCH=1
    +
    +# Install the .whl files using default Miniconda3 Python environment /home/ma-user/miniconda3/bin/pip.
    +RUN cd /tmp && \
    +    /home/ma-user/miniconda3/bin/pip install --no-cache-dir \
    +    /tmp/torch-1.8.1+cu111-cp37-cp37m-linux_x86_64.whl \
    +    /tmp/torchvision-0.9.1+cu111-cp37-cp37m-linux_x86_64.whl \
    +    /tmp/torchaudio-0.8.1-cp37-cp37m-linux_x86_64.whl
    +
    +# Build and install horovod-0.22.1.tar.gz using default Miniconda3 Python environment /home/ma-user/miniconda3/bin/pip.
    +RUN cd /tmp && \
    +    /home/ma-user/miniconda3/bin/pip install --no-cache-dir \
    +    /tmp/horovod-0.22.1.tar.gz
    +
    +# Create the container image.
    +FROM nvidia/cuda:11.1.1-runtime-ubuntu18.04
    +
    +COPY MLNX_OFED_LINUX-5.4-3.5.8.0-ubuntu18.04-x86_64.tgz /tmp
    +
    +# Install the vim, cURL, net-tools, MLNX_OFED, and SSH tools obtained from Huawei Mirrors.
    +RUN cp -a /etc/apt/sources.list /etc/apt/sources.list.bak && \
    +    sed -i "s@http://.*archive.ubuntu.com@http://repo.huaweicloud.com@g" /etc/apt/sources.list && \
    +    sed -i "s@http://.*security.ubuntu.com@http://repo.huaweicloud.com@g" /etc/apt/sources.list && \
    +    echo > /etc/apt/apt.conf.d/00skip-verify-peer.conf "Acquire { https::Verify-Peer false }" && \
    +    apt-get update && \
    +    apt-get install -y vim curl net-tools iputils-ping libfile-find-rule-perl-perl \
    +    openssh-client openssh-server && \
    +    ssh -V && \
    +    mkdir -p /run/sshd && \
    +    # mlnx ofed
    +    apt-get install -y python libfuse2 dpatch libnl-3-dev autoconf libnl-route-3-dev pciutils libnuma1 libpci3 m4 libelf1 debhelper automake graphviz bison lsof kmod libusb-1.0-0 swig libmnl0 autotools-dev flex chrpath libltdl-dev && \
    +    cd /tmp && \
    +    tar -xvf MLNX_OFED_LINUX-5.4-3.5.8.0-ubuntu18.04-x86_64.tgz && \
    +    MLNX_OFED_LINUX-5.4-3.5.8.0-ubuntu18.04-x86_64/mlnxofedinstall --user-space-only --basic --without-fw-update -q && \
    +    cd - && \
    +    rm -rf /tmp/* && \
    +    apt-get clean && \
    +    mv /etc/apt/sources.list.bak /etc/apt/sources.list && \
    +    rm /etc/apt/apt.conf.d/00skip-verify-peer.conf
    +
    +# Install the Open MPI 3.0.0 file obtained from Horovod v0.22.1.
    +# https://github.com/horovod/horovod/blob/v0.22.1/docker/horovod/Dockerfile
    +# https://github.com/horovod/horovod/files/1596799/openmpi-3.0.0-bin.tar.gz
    +COPY openmpi-3.0.0-bin.tar.gz /tmp
    +RUN cd /usr/local && \
    +    tar -zxf /tmp/openmpi-3.0.0-bin.tar.gz && \
    +    ldconfig && \
    +    mpirun --version
    +
    +# Add user ma-user (UID = 1000, GID = 100).
    +# A user group whose GID is 100 exists in the basic container image. User ma-user can directly run the following command:
    +RUN useradd -m -d /home/ma-user -s /bin/bash -g 100 -u 1000 ma-user
    +
    +# Copy the /home/ma-user/miniconda3 directory from the builder stage to the directory with the same name in the current container image.
    +COPY --chown=ma-user:100 --from=builder /home/ma-user/miniconda3 /home/ma-user/miniconda3
    +
    +# Configure the default user and working directory of the container image.
    +USER ma-user
    +WORKDIR /home/ma-user
    +
    +# Configure sshd to support SSH password-free login.
    +RUN MA_HOME=/home/ma-user && \
    +    # setup sshd dir
    +    mkdir -p ${MA_HOME}/etc && \
    +    ssh-keygen -f ${MA_HOME}/etc/ssh_host_rsa_key -N '' -t rsa  && \
    +    mkdir -p ${MA_HOME}/etc/ssh ${MA_HOME}/var/run  && \
    +    # setup sshd config (listen at {{MY_SSHD_PORT}} port)
    +    echo "Port {{MY_SSHD_PORT}}\n\
    +HostKey ${MA_HOME}/etc/ssh_host_rsa_key\n\
    +AuthorizedKeysFile ${MA_HOME}/.ssh/authorized_keys\n\
    +PidFile ${MA_HOME}/var/run/sshd.pid\n\
    +StrictModes no\n\
    +UsePAM no" > ${MA_HOME}/etc/ssh/sshd_config && \
    +    # generate ssh key
    +    ssh-keygen -t rsa -f ${MA_HOME}/.ssh/id_rsa -P '' && \
    +    cat ${MA_HOME}/.ssh/id_rsa.pub >> ${MA_HOME}/.ssh/authorized_keys && \
    +    # disable ssh host key checking for all hosts
    +    echo "Host *\n\
    +  StrictHostKeyChecking no" > ${MA_HOME}/.ssh/config
    +
    +# Configure the preset environment variables of the container image.
    +# Set PYTHONUNBUFFERED to 1 to prevent log loss.
    +ENV PATH=/home/ma-user/miniconda3/bin:$PATH \
    +    PYTHONUNBUFFERED=1
    +
    +

    For details about how to write a Dockerfile, see official Docker documents.

    +
  9. Download MLNX_OFED_LINUX-5.4-3.5.8.0-ubuntu18.04-x86_64.tgz.

    Go to https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/, click Download, set Version to 5.4-3.5.8.0-LTS, OSDistributionVersion to Ubuntu 18.04, and Architecture to x86_64, and download MLNX_OFED_LINUX-5.4-3.5.8.0-ubuntu18.04-x86_64.tgz.

    +
  10. Download openmpi-3.0.0-bin.tar.gz.

    Download openmpi-3.0.0-bin.tar.gz from https://github.com/horovod/horovod/files/1596799/openmpi-3.0.0-bin.tar.gz.

    +
  11. Store the pip source file, .whl files, and Miniconda3 installation file in the context folder, which is as follows:
    context
    +├── Dockerfile
    +├── MLNX_OFED_LINUX-5.4-3.5.8.0-ubuntu18.04-x86_64.tgz
    +├── Miniconda3-py37_4.12.0-Linux-x86_64.sh
    +├── horovod-0.22.1.tar.gz
    +├── openmpi-3.0.0-bin.tar.gz
    +├── pip.conf
    +├── torch-1.8.1+cu111-cp37-cp37m-linux_x86_64.whl
    +├── torchaudio-0.8.1-cp37-cp37m-linux_x86_64.whl
    +└── torchvision-0.9.1+cu111-cp37-cp37m-linux_x86_64.whl
    +
  12. Create the container image. Run the following command in the directory where the Dockerfile is stored to build the container image horovod-pytorch:0.22.1-1.8.1-ofed-cuda11.1:
    1
    docker build . -t horovod-pytorch:0.22.1-1.8.1-ofed-cuda11.1
    +
    + +
    +
    The following log shows that the image has been created.
    Successfully tagged horovod-pytorch:0.22.1-1.8.1-ofed-cuda11.1
    +
    +
+
+

Step 5 Uploading the Image to SWR

  1. Log in to the SWR console and select the target region.
    Figure 2 SWR console
    +
  2. Click Create Organization in the upper right corner and enter an organization name to create an organization. Customize the organization name. Replace the organization name deep-learning in subsequent commands with the actual organization name.
    Figure 3 Creating an organization
    +
  3. Click Generate Login Command in the upper right corner to obtain a login command.
    Figure 4 Login Command
    +
  4. Log in to the local environment as the root user and enter the login command.
  5. Upload the image to SWR.
    1. Tag the uploaded image.
      # Replace the region, domain, as well as organization name deep-learning with the actual values.
      +sudo docker tag horovod-pytorch:0.22.1-1.8.1-ofed-cuda11.1 swr.{region-id}.{domain}/deep-learning/horovod-pytorch:0.22.1-1.8.1-ofed-cuda11.1
      +
    2. Run the following command to upload the image:
      # Replace the region, domain, as well as organization name deep-learning with the actual values.
      +sudo docker push swr.{region-id}.{domain}/deep-learning/horovod-pytorch:0.22.1-1.8.1-ofed-cuda11.1
      +
    +
  6. After the image is uploaded, choose My Images in navigation pane on the left of the SWR console to view the uploaded custom images.
+
+

Step 6 Creating a Training Job on ModelArts

  1. Log in to the ModelArts management console, check whether access authorization has been configured for your account. For details, see Configuring Agency Authorization. If you have been authorized using access keys, clear the authorization and configure agency authorization.
  2. In the navigation pane, choose Training Management > Training Jobs. The training job list is displayed by default.
  3. Click Create Training Job. On the page that is displayed, configure parameters and click Next.
    • Created By: Custom algorithms
    • Boot Mode: Custom images
    • Image path: image created in Step 5 Uploading the Image to SWR.
    • Code Directory: directory where the boot script file is stored in OBS, for example, obs://test-modelarts/pytorch/demo-code/. The training code is automatically downloaded to the ${MA_JOB_DIR}/demo-code directory of the training container. demo-code (customizable) is the last-level directory of the OBS path.
    • Boot Command: bash ${MA_JOB_DIR}/demo-code/run_mpi.sh python ${MA_JOB_DIR}/demo-code/pytorch_synthetic_benchmark.py. demo-code (customizable) is the last-level directory of the OBS path.
    • Environment Variable: Click Add Environment Variable and add the environment variable MY_SSHD_PORT=38888.
    • Resource Pool: Select Public resource pools.
    • Resource Type: Select GPU.
    • Compute Nodes: 1 or 2
    • Persistent Log Saving: enabled
    • Job Log Path: OBS path to stored training logs, for example, obs://test-modelarts/pytorch/log/
    +
  4. Confirm the configurations of the training job and click Submit.
  5. Wait until the training job is created.

    After you submit the job creation request, the system will automatically perform operations on the backend, such as downloading the container image and code directory and running the boot command. A training job requires a certain period of time for running. The duration ranges from dozens of minutes to several hours, varying depending on the service logic and selected resources. After the training job is executed, the log similar to the following is output.

    +
    Figure 5 Run logs of training jobs with GPU specifications (one compute node)
    +
    Figure 6 Run logs of training jobs with GPU specifications (two compute nodes)
    +
+
+
+
+ + + \ No newline at end of file diff --git a/docs/modelarts/best-practice/develop-modelarts-0144.html b/docs/modelarts/best-practice/develop-modelarts-0144.html new file mode 100644 index 00000000..551a7482 --- /dev/null +++ b/docs/modelarts/best-practice/develop-modelarts-0144.html @@ -0,0 +1,350 @@ + + +

Example: Creating a Custom Image for Training (MindSpore and GPUs)

+

This section describes how to create an image and use it for training on ModelArts. The AI engine used in the image is MindSpore, and the resources used for training are GPUs.

+

This section applies only to training jobs of the new version.

+
+

Scenario

In this example, write a Dockerfile to create a custom image on a Linux x86_64 server running Ubuntu 18.04.

+

Create a container image with the following configurations and use the image to create a GPU-powered training job on ModelArts:

+ +
+

Procedure

Before using a custom image to create a training job, you need to be familiar with Docker and have development experience.

+ +
+

Prerequisites

You have registered a Huawei Cloud account. The account is not in arrears or frozen.

+
+

Step 1 Creating an OBS Bucket and Folder

Create a bucket and folders in OBS for storing the sample dataset and training code. Table 1 lists the folders to be created. Replace the bucket name and folder names in the example with actual names.

+

For details, see Creating a Bucket and Creating a Folder.

+

Ensure that the OBS and ModelArts are in the same region.

+ +
+ + + + + + + + + + + + + + + + +
Table 1 Required OBS folders

Folder

+

Description

+

obs://test-modelarts/mindspore-gpu/resnet/

+

Stores the training script.

+

obs://test-modelarts/mindspore-gpu/cifar-10-batches-bin/

+

Stores dataset files.

+

obs://test-modelarts/mindspore-gpu/output/

+

Stores training output files.

+

obs://test-modelarts/mindspore-gpu/log/

+

Store training log files.

+
+
+
+

Step 2 Creating a Dataset and Uploading It to OBS

Go to http://www.cs.toronto.edu/~kriz/cifar.html, download CIFAR-10 binary version (suitable for C programs), decompress it, and upload the decompressed data to obs://test-modelarts/mindspore-gpu/cifar-10-batches-bin/ in the OBS bucket, which is as follows.

+
Figure 1 Datasets
+
+

Step 3 Preparing the Training Script and Uploading It to OBS

Obtain the ResNet file and script run_mpi.sh and upload them to obs://test-modelarts/mindspore-gpu/resnet/ in the OBS bucket.

+

Download the ResNet file from https://gitee.com/mindspore/models/tree/r1.8/official/cv/resnet.

+

run_mpi.sh is as follows:

+
#!/bin/bash
+MY_HOME=/home/ma-user
+
+MY_SSHD_PORT=${MY_SSHD_PORT:-"36666"}
+
+MY_MPI_BTL_TCP_IF=${MY_MPI_BTL_TCP_IF:-"eth0,bond0"}
+
+MY_TASK_INDEX=${MA_TASK_INDEX:-${VC_TASK_INDEX:-${VK_TASK_INDEX}}}
+
+MY_MPI_SLOTS=${MY_MPI_SLOTS:-"${MA_NUM_GPUS}"}
+
+MY_MPI_TUNE_FILE="${MY_HOME}/env_for_user_process"
+
+if [ -z ${MY_MPI_SLOTS} ]; then
+    echo "[run_mpi] MY_MPI_SLOTS is empty, set it be 1"
+    MY_MPI_SLOTS="1"
+fi
+
+printf "MY_HOME: ${MY_HOME}\nMY_SSHD_PORT: ${MY_SSHD_PORT}\nMY_MPI_BTL_TCP_IF: ${MY_MPI_BTL_TCP_IF}\nMY_TASK_INDEX: ${MY_TASK_INDEX}\nMY_MPI_SLOTS: ${MY_MPI_SLOTS}\n"
+
+env | grep -E '^MA_|^AWS_|^S3_|^PATH|^VC_WORKER_|^SCC|^CRED' | grep -v '=$' > ${MY_MPI_TUNE_FILE}
+# add -x to each line
+sed -i 's/^/-x /' ${MY_MPI_TUNE_FILE}
+
+sed -i "s|{{MY_SSHD_PORT}}|${MY_SSHD_PORT}|g" ${MY_HOME}/etc/ssh/sshd_config
+
+# start sshd service
+bash -c "$(which sshd) -f ${MY_HOME}/etc/ssh/sshd_config"
+
+# confirm the sshd is up
+netstat -anp | grep LIS | grep ${MY_SSHD_PORT}
+
+if [ $MY_TASK_INDEX -eq 0 ]; then
+    # generate the hostfile of mpi
+    for ((i=0; i<$MA_NUM_HOSTS; i++))
+    do
+        eval hostname=${MA_VJ_NAME}-${MA_TASK_NAME}-${i}.${MA_VJ_NAME}
+        echo "[run_mpi] hostname: ${hostname}"
+
+        ip=""
+        while [ -z "$ip" ]; do
+            ip=$(ping -c 1 ${hostname} | grep "PING" | sed -E 's/PING .* .([0-9.]+). .*/\1/g')
+            sleep 1
+        done
+        echo "[run_mpi] resolved ip: ${ip}"
+
+        # test the sshd is up
+        while :
+        do
+            if [ cat < /dev/null >/dev/tcp/${ip}/${MY_SSHD_PORT} ]; then
+                break
+            fi
+            sleep 1
+        done
+
+        echo "[run_mpi] the sshd of ip ${ip} is up"
+
+        echo "${ip} slots=$MY_MPI_SLOTS" >> ${MY_HOME}/hostfile
+    done
+
+    printf "[run_mpi] hostfile:\n`cat ${MY_HOME}/hostfile`\n"
+fi
+
+RET_CODE=0
+
+if [ $MY_TASK_INDEX -eq 0 ]; then
+
+    echo "[run_mpi] start exec command time: "$(date +"%Y-%m-%d-%H:%M:%S")
+
+    np=$(( ${MA_NUM_HOSTS} * ${MY_MPI_SLOTS} ))
+
+    echo "[run_mpi] command: mpirun -np ${np} -hostfile ${MY_HOME}/hostfile -mca plm_rsh_args \"-p ${MY_SSHD_PORT}\" -tune ${MY_MPI_TUNE_FILE} ... $@"
+
+    # execute mpirun at worker-0
+    # mpirun
+    mpirun \
+        -np ${np} \
+        -hostfile ${MY_HOME}/hostfile \
+        -mca plm_rsh_args "-p ${MY_SSHD_PORT}" \
+        -tune ${MY_MPI_TUNE_FILE} \
+        -bind-to none -map-by slot \
+        -x NCCL_DEBUG=INFO -x NCCL_SOCKET_IFNAME=${MY_MPI_BTL_TCP_IF} -x NCCL_SOCKET_FAMILY=AF_INET \
+        -x HOROVOD_MPI_THREADS_DISABLE=1 \
+        -x LD_LIBRARY_PATH \
+        -mca pml ob1 -mca btl ^openib -mca plm_rsh_no_tree_spawn true \
+        "$@"
+
+    RET_CODE=$?
+
+    if [ $RET_CODE -ne 0 ]; then
+        echo "[run_mpi] exec command failed, exited with $RET_CODE"
+    else
+        echo "[run_mpi] exec command successfully, exited with $RET_CODE"
+    fi
+
+    # stop 1...N worker by killing the sleep proc
+    sed -i '1d' ${MY_HOME}/hostfile
+    if [ `cat ${MY_HOME}/hostfile | wc -l` -ne 0 ]; then
+        echo "[run_mpi] stop 1 to (N - 1) worker by killing the sleep proc"
+
+        sed -i 's/${MY_MPI_SLOTS}/1/g' ${MY_HOME}/hostfile
+        printf "[run_mpi] hostfile:\n`cat ${MY_HOME}/hostfile`\n"
+
+        mpirun \
+        --hostfile ${MY_HOME}/hostfile \
+        --mca btl_tcp_if_include ${MY_MPI_BTL_TCP_IF} \
+        --mca plm_rsh_args "-p ${MY_SSHD_PORT}" \
+        -x PATH -x LD_LIBRARY_PATH \
+        pkill sleep \
+        > /dev/null 2>&1
+    fi
+
+    echo "[run_mpi] exit time: "$(date +"%Y-%m-%d-%H:%M:%S")
+else
+    echo "[run_mpi] the training log is in worker-0"
+    sleep 365d
+    echo "[run_mpi] exit time: "$(date +"%Y-%m-%d-%H:%M:%S")
+fi
+
+exit $RET_CODE
+

The following figure shows obs://test-modelarts/mindspore-gpu/resnet/, including the ResNet file and run_mpi.sh.

+
Figure 2 ResNet file and run_mpi.sh
+
+

Step 4 Preparing a Server

Obtain a Linux x86_64 server running Ubuntu 18.04. Either an ECS or your local PC will do.

+
For details about how to purchase an ECS, see Purchasing and Logging In to a Linux ECS. When creating the ECS, select an Ubuntu 18.04 public image.
Figure 3 Creating an ECS using a public image (x86)
+
+
+

Step 5 Creating a Custom Image

Create a container image with the following configurations and use the image to create a training job on ModelArts:

+ +

This section describes how to write a Dockerfile to create a custom image.

+
  1. Install Docker.

    The following uses Linux x86_64 as an example to describe how to obtain a Docker installation package. For more details about how to install Docker, see official Docker documents. Run the following command to install Docker:

    +
    curl -fsSL get.docker.com -o get-docker.sh
    +sh get-docker.sh
    +

    If the docker images command can be executed, Docker has been installed. In this case, skip this step.

    +
  2. Check the Docker Engine version. Run the following command:
    docker version | grep -A 1 Engine
    +
    The following information is displayed:
     Engine:
    +  Version:          18.09.0
    +
    +

    Use the Docker engine of the preceding version or later to create a custom image.

    +
    +
  3. Create a folder named context.
    mkdir -p context
    +
  4. Obtain the pip.conf file. In this example, the pip source provided by Huawei Mirrors is used, which is as follows:
    [global]
    +index-url = https://repo.huaweicloud.com/repository/pypi/simple
    +trusted-host = repo.huaweicloud.com
    +timeout = 120
    +

    To obtain pip.conf, switch to Huawei Mirrors https://mirrors.huaweicloud.com/home and search for pypi.

    +
    +
  5. Download mindspore_gpu-1.8.1-cp37-cp37m-linux_x86_64.whl from https://ms-release.obs.cn-north-4.myhuaweicloud.com/1.8.1/MindSpore/gpu/x86_64/cuda-11.1/mindspore_gpu-1.8.1-cp37-cp37m-linux_x86_64.whl.
  6. Download the Miniconda3 installation file.

    Download Miniconda3-py37_4.12.0-Linux-x86_64.sh from https://repo.anaconda.com/miniconda/Miniconda3-py37_4.12.0-Linux-x86_64.sh.

    +
  7. Write the container image Dockerfile.
    Create an empty file named Dockerfile in the context folder and copy the following content to the file:
    # The server on which the container image is created must access the Internet.
    +
    +# Base container image at https://github.com/NVIDIA/nvidia-docker/wiki/CUDA
    +#
    +# https://docs.docker.com/develop/develop-images/multistage-build/#use-multi-stage-builds
    +# require Docker Engine >= 17.05
    +#
    +# builder stage
    +FROM nvidia/cuda:11.1.1-devel-ubuntu18.04 AS builder
    +
    +# The default user of the base container image is root.
    +# USER root
    +
    +# Use the PyPI configuration obtained from Huawei Mirrors.
    +RUN mkdir -p /root/.pip/
    +COPY pip.conf /root/.pip/pip.conf
    +
    +# Copy the installation files to the /tmp directory in the base container image.
    +COPY Miniconda3-py37_4.12.0-Linux-x86_64.sh /tmp
    +COPY mindspore_gpu-1.8.1-cp37-cp37m-linux_x86_64.whl /tmp
    +
    +# https://conda.io/projects/conda/en/latest/user-guide/install/linux.html#installing-on-linux
    +# Install Miniconda3 in the /home/ma-user/miniconda3 directory of the base container image.
    +RUN bash /tmp/Miniconda3-py37_4.12.0-Linux-x86_64.sh -b -p /home/ma-user/miniconda3
    +
    +# Install the whl file using default Miniconda3 Python environment /home/ma-user/miniconda3/bin/pip.
    +RUN cd /tmp && \
    +    /home/ma-user/miniconda3/bin/pip install --no-cache-dir \
    +    /tmp/mindspore_gpu-1.8.1-cp37-cp37m-linux_x86_64.whl \
    +    easydict PyYAML
    +
    +# Create the container image.
    +FROM nvidia/cuda:11.1.1-cudnn8-runtime-ubuntu18.04
    +
    +COPY MLNX_OFED_LINUX-5.4-3.5.8.0-ubuntu18.04-x86_64.tgz /tmp
    +
    +# Install the vim, cURL, net-tools, MLNX_OFED, and SSH tools obtained from Huawei Mirrors.
    +RUN cp -a /etc/apt/sources.list /etc/apt/sources.list.bak && \
    +    sed -i "s@http://.*archive.ubuntu.com@http://repo.huaweicloud.com@g" /etc/apt/sources.list && \
    +    sed -i "s@http://.*security.ubuntu.com@http://repo.huaweicloud.com@g" /etc/apt/sources.list && \
    +    echo > /etc/apt/apt.conf.d/00skip-verify-peer.conf "Acquire { https::Verify-Peer false }" && \
    +    apt-get update && \
    +    apt-get install -y vim curl net-tools iputils-ping libfile-find-rule-perl-perl \
    +    openssh-client openssh-server && \
    +    ssh -V && \
    +    mkdir -p /run/sshd && \
    +    # mlnx ofed
    +    apt-get install -y python libfuse2 dpatch libnl-3-dev autoconf libnl-route-3-dev pciutils libnuma1 libpci3 m4 libelf1 debhelper automake graphviz bison lsof kmod libusb-1.0-0 swig libmnl0 autotools-dev flex chrpath libltdl-dev && \
    +    cd /tmp && \
    +    tar -xvf MLNX_OFED_LINUX-5.4-3.5.8.0-ubuntu18.04-x86_64.tgz && \
    +    MLNX_OFED_LINUX-5.4-3.5.8.0-ubuntu18.04-x86_64/mlnxofedinstall --user-space-only --basic --without-fw-update -q && \
    +    cd - && \
    +    rm -rf /tmp/* && \
    +    apt-get clean && \
    +    mv /etc/apt/sources.list.bak /etc/apt/sources.list && \
    +    rm /etc/apt/apt.conf.d/00skip-verify-peer.conf
    +
    +# Install the Open MPI 3.0.0 file obtained from Horovod v0.22.1.
    +# https://github.com/horovod/horovod/blob/v0.22.1/docker/horovod/Dockerfile
    +# https://github.com/horovod/horovod/files/1596799/openmpi-3.0.0-bin.tar.gz
    +COPY openmpi-3.0.0-bin.tar.gz /tmp
    +RUN cd /usr/local && \
    +    tar -zxf /tmp/openmpi-3.0.0-bin.tar.gz && \
    +    ldconfig && \
    +    mpirun --version
    +
    +# Add user ma-user (UID = 1000, GID = 100).
    +# A user group whose GID is 100 exists in the basic container image. User ma-user can directly run the following command:
    +RUN useradd -m -d /home/ma-user -s /bin/bash -g 100 -u 1000 ma-user
    +
    +# Copy the /home/ma-user/miniconda3 directory from the builder stage to the directory with the same name in the current container image.
    +COPY --chown=ma-user:100 --from=builder /home/ma-user/miniconda3 /home/ma-user/miniconda3
    +
    +# Configure the default user and working directory of the container image.
    +USER ma-user
    +WORKDIR /home/ma-user
    +
    +# Configure sshd to support SSH password-free login.
    +RUN MA_HOME=/home/ma-user && \
    +    # setup sshd dir
    +    mkdir -p ${MA_HOME}/etc && \
    +    ssh-keygen -f ${MA_HOME}/etc/ssh_host_rsa_key -N '' -t rsa  && \
    +    mkdir -p ${MA_HOME}/etc/ssh ${MA_HOME}/var/run  && \
    +    # setup sshd config (listen at {{MY_SSHD_PORT}} port)
    +    echo "Port {{MY_SSHD_PORT}}\n\
    +HostKey ${MA_HOME}/etc/ssh_host_rsa_key\n\
    +AuthorizedKeysFile ${MA_HOME}/.ssh/authorized_keys\n\
    +PidFile ${MA_HOME}/var/run/sshd.pid\n\
    +StrictModes no\n\
    +UsePAM no" > ${MA_HOME}/etc/ssh/sshd_config && \
    +    # generate ssh key
    +    ssh-keygen -t rsa -f ${MA_HOME}/.ssh/id_rsa -P '' && \
    +    cat ${MA_HOME}/.ssh/id_rsa.pub >> ${MA_HOME}/.ssh/authorized_keys && \
    +    # disable ssh host key checking for all hosts
    +    echo "Host *\n\
    +  StrictHostKeyChecking no" > ${MA_HOME}/.ssh/config
    +
    +# Configure the preset environment variables of the container image.
    +# Set PYTHONUNBUFFERED to 1 to prevent log loss.
    +ENV PATH=/home/ma-user/miniconda3/bin:$PATH \
    +    LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH \
    +    PYTHONUNBUFFERED=1
    +
    +

    For details about how to write a Dockerfile, see official Docker documents.

    +
  8. Download MLNX_OFED_LINUX-5.4-3.5.8.0-ubuntu18.04-x86_64.tgz.

    Go to https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/, click Download, set Version to 5.4-3.5.8.0-LTS, OSDistributionVersion to Ubuntu 18.04, and Architecture to x86_64, and download MLNX_OFED_LINUX-5.4-3.5.8.0-ubuntu18.04-x86_64.tgz.

    +
  9. Download openmpi-3.0.0-bin.tar.gz.

    Download openmpi-3.0.0-bin.tar.gz from https://github.com/horovod/horovod/files/1596799/openmpi-3.0.0-bin.tar.gz.

    +
  10. Store the Dockerfile and Miniconda3 installation file in the context folder, which is as follows:
    context
    +├── Dockerfile
    +├── MLNX_OFED_LINUX-5.4-3.5.8.0-ubuntu18.04-x86_64.tgz
    +├── Miniconda3-py37_4.12.0-Linux-x86_64.sh
    +├── mindspore_gpu-1.8.1-cp37-cp37m-linux_x86_64.whl
    +├── openmpi-3.0.0-bin.tar.gz
    +└── pip.conf
    +
  11. Create the container image. Run the following command in the directory where the Dockerfile is stored to build the container image mindspore:1.8.1-ofed-cuda11.1:
    1
    docker build . -t mindspore:1.8.1-ofed-cuda11.1
    +
    + +
    +
    The following log shows that the image has been created.
    Successfully tagged mindspore:1.8.1-ofed-cuda11.1
    +
    +
+
+

Step 6 Uploading the Image to SWR

  1. Log in to the SWR console and select the target region.
    Figure 4 SWR console
    +
  2. Click Create Organization in the upper right corner and enter an organization name to create an organization. Customize the organization name. Replace the organization name deep-learning in subsequent commands with the actual organization name.
    Figure 5 Creating an organization
    +
  3. Click Generate Login Command in the upper right corner to obtain a login command.
    Figure 6 Login Command
    +
  4. Log in to the local environment as the root user and enter the login command.
  5. Upload the image to SWR.
    1. Tag the uploaded image.
      # Replace the region, domain, as well as organization name deep-learning with the actual values.
      +sudo docker tag mindspore:1.8.1-ofed-cuda11.1 swr.{region-id}.{domain}/deep-learning/mindspore:1.8.1-ofed-cuda11.1
      +
    2. Run the following command to upload the image:
      # Replace the region, domain, as well as organization name deep-learning with the actual values.
      +sudo docker push swr.{region-id}.{domain}/deep-learning/mindspore:1.8.1-ofed-cuda11.1
      +
    +
  6. After the image is uploaded, choose My Images in navigation pane on the left of the SWR console to view the uploaded custom images.
+
+

Step 7 Creating a Training Job on ModelArts

  1. Log in to the ModelArts management console, check whether access authorization has been configured for your account. For details, see Configuring Agency Authorization. If you have been authorized using access keys, clear the authorization and configure agency authorization.
  2. In the navigation pane, choose Training Management > Training Jobs. The training job list is displayed by default.
  3. Click Create Training Job. On the page that is displayed, configure parameters and click Next.
    • Created By: Custom algorithms
    • Boot Mode: Custom images
    • Image path: image created in Step 6 Uploading the Image to SWR.
    • Code Directory: directory where the boot script file is stored in OBS, for example, obs://test-modelarts/mindspore-gpu/resnet/. The training code is automatically downloaded to the ${MA_JOB_DIR}/resnet directory of the training container. resnet (customizable) is the last-level directory of the OBS path.
    • Boot Command: bash ${MA_JOB_DIR}/resnet/run_mpi.sh python ${MA_JOB_DIR}/resnet/train.py. resnet (customizable) is the last-level directory of the OBS path.
    • Training Input: Click Add Training Input. Enter data_path for the name, select the OBS path to the target dataset, for example, obs://test-modelarts/mindspore-gpu/cifar-10-batches-bin/, and set Obtained from to Hyperparameters.
    • Training Output: Click Add Training Output. Enter output_path for the name, select an OBS path for storing training outputs, for example, obs://test-modelarts/mindspore-gpu/output/, and set Obtained from to Hyperparameters and Predownload to No.
    • Hyperparameters: Click Add Hyperparameter and add the following hyperparameters:
      • run_distribute=True
      • device_num=1 (Set this parameter based on the number of GPUs in the instance flavors.)
      • device_target=GPU
      • epoch_size=2
      +
    • Environment Variable: Click Add Environment Variable and add the environment variable MY_SSHD_PORT=38888.
    • Resource Pool: Select Public resource pools.
    • Resource Type: Select GPU.
    • Compute Nodes: 1 or 2
    • Persistent Log Saving: enabled
    • Job Log Path: OBS path to stored training logs, for example, obs://test-modelarts/mindspore-gpu/log/
    +
  4. Confirm the configurations of the training job and click Submit.
  5. Wait until the training job is created.

    After you submit the job creation request, the system will automatically perform operations on the backend, such as downloading the container image and code directory and running the boot command. A training job requires a certain period of time for running. The duration ranges from dozens of minutes to several hours, varying depending on the service logic and selected resources. After the training job is executed, the log similar to the following is output.

    +
    Figure 7 Run logs of training jobs with GPU specifications (one compute node)
    +
    Figure 8 Run logs of training jobs with GPU specifications (two compute nodes)
    +
+
+
+ + + \ No newline at end of file diff --git a/docs/modelarts/best-practice/develop-modelarts-0145.html b/docs/modelarts/best-practice/develop-modelarts-0145.html new file mode 100644 index 00000000..d836beb4 --- /dev/null +++ b/docs/modelarts/best-practice/develop-modelarts-0145.html @@ -0,0 +1,215 @@ + + +

Example: Creating a Custom Image for Training (TensorFlow and GPUs)

+

This section describes how to create an image and use it for training on ModelArts. The AI engine used in the image is TensorFlow, and the resources used for training are GPUs.

+

This section applies only to training jobs of the new version.

+
+

Scenario

In this example, write a Dockerfile to create a custom image on a Linux x86_64 server running Ubuntu 18.04.

+

Create a container image with the following configurations and use the image to create a GPU-powered training job on ModelArts:

+ +
+

Procedure

Before using a custom image to create a training job, you need to be familiar with Docker and have development experience.

+
  1. Prerequisites
  2. Step 1 Creating an OBS Bucket and Folder
  3. Step 2 Creating a Dataset and Uploading It to OBS
  4. Step 3 Preparing the Training Script and Uploading It to OBS
  5. Step 4 Preparing a Server
  6. Step 5 Creating a Custom Image
  7. Step 6 Uploading the Image to SWR
  8. Step 7 Creating a Training Job on ModelArts
+
+

Prerequisites

You have registered a Huawei Cloud account. The account is not in arrears or frozen.

+
+

Step 1 Creating an OBS Bucket and Folder

Create a bucket and folders in OBS for storing the sample dataset and training code. Table 1 lists the folders to be created. Replace the bucket name and folder names in the example with actual names.

+

For details, see Creating a Bucket and Creating a Folder.

+

Ensure that the OBS and ModelArts are in the same region.

+ +
+ + + + + + + + + + + + + +
Table 1 Required OBS folders

Folder

+

Description

+

obs://test-modelarts/tensorflow/code/

+

Stores the training script.

+

obs://test-modelarts/tensorflow/data/

+

Stores dataset files.

+

obs://test-modelarts/tensorflow/log/

+

Store training log files.

+
+
+
+

Step 2 Creating a Dataset and Uploading It to OBS

Download mnist.npz from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz, and upload it to obs://test-modelarts/tensorflow/data/ in the OBS bucket.

+
+

Step 3 Preparing the Training Script and Uploading It to OBS

Obtain the training script mnist.py and upload it to obs://test-modelarts/tensorflow/code/ in the OBS bucket.

+

mnist.py is as follows:

+
import argparse
+import tensorflow as tf
+
+parser = argparse.ArgumentParser(description='TensorFlow quick start')
+parser.add_argument('--data_url', type=str, default="./Data", help='path where the dataset is saved')
+args = parser.parse_args()
+
+mnist = tf.keras.datasets.mnist
+
+(x_train, y_train), (x_test, y_test) = mnist.load_data(args.data_url)
+x_train, x_test = x_train / 255.0, x_test / 255.0
+
+model = tf.keras.models.Sequential([
+  tf.keras.layers.Flatten(input_shape=(28, 28)),
+  tf.keras.layers.Dense(128, activation='relu'),
+  tf.keras.layers.Dropout(0.2),
+  tf.keras.layers.Dense(10)
+])
+
+loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+
+model.compile(optimizer='adam',
+              loss=loss_fn,
+              metrics=['accuracy'])
+
+model.fit(x_train, y_train, epochs=5)
+
+

Step 4 Preparing a Server

Obtain a Linux x86_64 server running Ubuntu 18.04. Either an ECS or your local PC will do.

+
For details about how to purchase an ECS, see Purchasing and Logging In to a Linux ECS. When creating the ECS, select an Ubuntu 18.04 public image.
Figure 1 Creating an ECS using a public image (x86)
+
+
+

Step 5 Creating a Custom Image

Create a container image with the following configurations and use the image to create a training job on ModelArts:

+ +

This section describes how to write a Dockerfile to create a custom image.

+
  1. Install Docker.

    The following uses Linux x86_64 as an example to describe how to obtain a Docker installation package. For more details about how to install Docker, see official Docker documents. Run the following command to install Docker:

    +
    curl -fsSL get.docker.com -o get-docker.sh
    +sh get-docker.sh
    +

    If the docker images command can be executed, Docker has been installed. In this case, skip this step.

    +
  2. Check the Docker Engine version. Run the following command:
    docker version | grep -A 1 Engine
    +
    The following information is displayed:
     Engine:
    +  Version:          18.09.0
    +
    +

    Use the Docker engine of the preceding version or later to create a custom image.

    +
    +
  3. Create a folder named context.
    mkdir -p context
    +
  4. Obtain the pip.conf file. In this example, the pip source provided by Huawei Mirrors is used, which is as follows:
    [global]
    +index-url = https://repo.huaweicloud.com/repository/pypi/simple
    +trusted-host = repo.huaweicloud.com
    +timeout = 120
    +

    To obtain pip.conf, switch to Huawei Mirrors https://mirrors.huaweicloud.com/home and search for pypi.

    +
    +
  5. Download tensorflow_gpu-2.10.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.

    Download tensorflow_gpu-2.10.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl from https://pypi.org/project/tensorflow-gpu/2.10.0/#files.

    +
  6. Download the Miniconda3 installation file.

    Download Miniconda3-py37_4.12.0-Linux-x86_64.sh from https://repo.anaconda.com/miniconda/Miniconda3-py37_4.12.0-Linux-x86_64.sh.

    +
  7. Write the container image Dockerfile.
    Create an empty file named Dockerfile in the context folder and copy the following content to the file:
    # The server on which the container image is created must access the Internet.
    +
    +# Base container image at https://github.com/NVIDIA/nvidia-docker/wiki/CUDA
    +#
    +# https://docs.docker.com/develop/develop-images/multistage-build/#use-multi-stage-builds
    +# require Docker Engine >= 17.05
    +#
    +# builder stage
    +FROM nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu18.04 AS builder
    +
    +# The default user of the base container image is root.
    +# USER root
    +
    +# Use the PyPI configuration obtained from Huawei Mirrors.
    +RUN mkdir -p /root/.pip/
    +COPY pip.conf /root/.pip/pip.conf
    +
    +# Copy the installation files to the /tmp directory in the base container image.
    +COPY Miniconda3-py37_4.12.0-Linux-x86_64.sh /tmp
    +COPY tensorflow_gpu-2.10.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl /tmp
    +
    +# https://conda.io/projects/conda/en/latest/user-guide/install/linux.html#installing-on-linux
    +# Install Miniconda3 in the /home/ma-user/miniconda3 directory of the base container image.
    +RUN bash /tmp/Miniconda3-py37_4.12.0-Linux-x86_64.sh -b -p /home/ma-user/miniconda3
    +
    +# Install the whl file using default Miniconda3 Python environment /home/ma-user/miniconda3/bin/pip.
    +RUN cd /tmp && \
    +    /home/ma-user/miniconda3/bin/pip install --no-cache-dir \
    +    /tmp/tensorflow_gpu-2.10.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    +
    +RUN cd /tmp && \
    +    /home/ma-user/miniconda3/bin/pip install --no-cache-dir keras==2.10.0
    +
    +# Create the container image.
    +FROM nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu18.04
    +
    +COPY MLNX_OFED_LINUX-5.4-3.5.8.0-ubuntu18.04-x86_64.tgz /tmp
    +
    +# Install the vim, cURL, net-tools, and MLNX_OFED tools obtained from Huawei Mirrors.
    +RUN cp -a /etc/apt/sources.list /etc/apt/sources.list.bak && \
    +    sed -i "s@http://.*archive.ubuntu.com@http://repo.huaweicloud.com@g" /etc/apt/sources.list && \
    +    sed -i "s@http://.*security.ubuntu.com@http://repo.huaweicloud.com@g" /etc/apt/sources.list && \
    +    echo > /etc/apt/apt.conf.d/00skip-verify-peer.conf "Acquire { https::Verify-Peer false }" && \
    +    apt-get update && \
    +    apt-get install -y vim curl net-tools iputils-ping && \
    +    # mlnx ofed
    +    apt-get install -y python libfuse2 dpatch libnl-3-dev autoconf libnl-route-3-dev pciutils libnuma1 libpci3 m4 libelf1 debhelper automake graphviz bison lsof kmod libusb-1.0-0 swig libmnl0 autotools-dev flex chrpath libltdl-dev && \
    +    cd /tmp && \
    +    tar -xvf MLNX_OFED_LINUX-5.4-3.5.8.0-ubuntu18.04-x86_64.tgz && \
    +    MLNX_OFED_LINUX-5.4-3.5.8.0-ubuntu18.04-x86_64/mlnxofedinstall --user-space-only --basic --without-fw-update -q && \
    +    cd - && \
    +    rm -rf /tmp/* && \
    +    apt-get clean && \
    +    mv /etc/apt/sources.list.bak /etc/apt/sources.list && \
    +    rm /etc/apt/apt.conf.d/00skip-verify-peer.conf
    +
    +# Add user ma-user (UID = 1000, GID = 100).
    +# A user group whose GID is 100 exists in the basic container image. User ma-user can directly run the following command:
    +RUN useradd -m -d /home/ma-user -s /bin/bash -g 100 -u 1000 ma-user
    +
    +# Copy the /home/ma-user/miniconda3 directory from the builder stage to the directory with the same name in the current container image.
    +COPY --chown=ma-user:100 --from=builder /home/ma-user/miniconda3 /home/ma-user/miniconda3
    +
    +# Configure the default user and working directory of the container image.
    +USER ma-user
    +WORKDIR /home/ma-user
    +
    +# Configure the preset environment variables of the container image.
    +# Set PYTHONUNBUFFERED to 1 to prevent log loss.
    +ENV PATH=/home/ma-user/miniconda3/bin:$PATH \
    +    LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH \
    +    PYTHONUNBUFFERED=1
    +
    +

    For details about how to write a Dockerfile, see official Docker documents.

    +
  8. Download MLNX_OFED_LINUX-5.4-3.5.8.0-ubuntu18.04-x86_64.tgz.

    Go to https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/, click Download, set Version to 5.4-3.5.8.0-LTS, OSDistributionVersion to Ubuntu 18.04, and Architecture to x86_64, and download MLNX_OFED_LINUX-5.4-3.5.8.0-ubuntu18.04-x86_64.tgz.

    +
  9. Store the Dockerfile and Miniconda3 installation file in the context folder, which is as follows:
    context
    +├── Dockerfile
    +├── MLNX_OFED_LINUX-5.4-3.5.8.0-ubuntu18.04-x86_64.tgz
    +├── Miniconda3-py37_4.12.0-Linux-x86_64.sh
    +├── pip.conf
    +└── tensorflow_gpu-2.10.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    +
  10. Create the container image. Run the following command in the directory where the Dockerfile is stored to build the container image tensorflow:2.10.0-ofed-cuda11.2:
    1
    docker build . -t tensorflow:2.10.0-ofed-cuda11.2
    +
    + +
    +
    The following log shows that the image has been created.
    Successfully tagged tensorflow:2.10.0-ofed-cuda11.2
    +
    +
+
+

Step 6 Uploading the Image to SWR

  1. Log in to the SWR console and select the target region.
    Figure 2 SWR console
    +
  2. Click Create Organization in the upper right corner and enter an organization name to create an organization. Customize the organization name. Replace the organization name deep-learning in subsequent commands with the actual organization name.
    Figure 3 Creating an organization
    +
  3. Click Generate Login Command in the upper right corner to obtain a login command.
    Figure 4 Login Command
    +
  4. Log in to the local environment as the root user and enter the login command.
  5. Upload the image to SWR.
    1. Tag the uploaded image.
      # Replace the region, domain, as well as organization name deep-learning with the actual values.
      +sudo docker tag tensorflow:2.10.0-ofed-cuda11.2 swr.{region-id}.{domain}/deep-learning/tensorflow:2.10.0-ofed-cuda11.2
      +
    2. Run the following command to upload the image:
      # Replace the region, domain, as well as organization name deep-learning with the actual values.
      +sudo docker push swr.{region-id}.{domain}/deep-learning/tensorflow:2.10.0-ofed-cuda11.2
      +
    +
  6. After the image is uploaded, choose My Images in navigation pane on the left of the SWR console to view the uploaded custom images.
+
+

Step 7 Creating a Training Job on ModelArts

  1. Log in to the ModelArts management console, check whether access authorization has been configured for your account. For details, see Configuring Agency Authorization. If you have been authorized using access keys, clear the authorization and configure agency authorization.
  2. In the navigation pane, choose Training Management > Training Jobs. The training job list is displayed by default.
  3. Click Create Training Job. On the page that is displayed, configure parameters and click Next.
    • Created By: Custom algorithms
    • Boot Mode: Custom images
    • Image path: image created in Step 5 Creating a Custom Image.
    • Code Directory: directory where the boot script file is stored in OBS, for example, obs://test-modelarts/tensorflow/code/. The training code is automatically downloaded to the ${MA_JOB_DIR}/code directory of the training container. code (customizable) is the last-level directory of the OBS path.
    • Boot Command: python ${MA_JOB_DIR}/code/mnist.py. code (customizable) is the last-level directory of the OBS path.
    • Training Input: Click Add Training Input. Enter data_path for the name, select the OBS path to mnist.npz, for example, obs://test-modelarts/tensorflow/data/mnist.npz, and set Obtained from to Hyperparameters.
    • Resource Pool: Select Public resource pools.
    • Resource Type: Select GPU.
    • Compute Nodes: Enter 1.
    • Persistent Log Saving: enabled
    • Job Log Path: OBS path to stored training logs, for example, obs://test-modelarts/mindspore-gpu/log/
    +
  4. Confirm the configurations of the training job and click Submit.
  5. Wait until the training job is created.

    After you submit the job creation request, the system will automatically perform operations on the backend, such as downloading the container image and code directory and running the boot command. A training job requires a certain period of time for running. The duration ranges from dozens of minutes to several hours, varying depending on the service logic and selected resources. After the training job is executed, the log similar to the following is output.

    +
    Figure 5 Run logs of training jobs with GPU specifications
    +
+
+
+
+ + + \ No newline at end of file diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679356920.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679356920.png new file mode 100644 index 00000000..6e43d20f Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679356920.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357116.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357116.png new file mode 100644 index 00000000..f1f72ffa Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357116.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357124.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357124.png new file mode 100644 index 00000000..7c903ff9 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357124.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357172.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357172.png new file mode 100644 index 00000000..421698dd Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357172.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357176.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357176.png new file mode 100644 index 00000000..d784bb75 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357176.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357180.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357180.png new file mode 100644 index 00000000..226c24e7 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357180.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357188.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357188.png new file mode 100644 index 00000000..174d1f60 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357188.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357192.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357192.png new file mode 100644 index 00000000..cdcdc9a5 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357192.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357196.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357196.png new file mode 100644 index 00000000..62874d47 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357196.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357200.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357200.png new file mode 100644 index 00000000..b4e2d9ed Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357200.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357204.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357204.png new file mode 100644 index 00000000..354b8f6a Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357204.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357208.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357208.png new file mode 100644 index 00000000..48731dbf Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357208.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357304.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357304.png new file mode 100644 index 00000000..cdf9e528 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357304.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357308.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357308.png new file mode 100644 index 00000000..524340b8 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357308.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357312.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357312.png new file mode 100644 index 00000000..ab890557 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357312.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357340.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357340.png new file mode 100644 index 00000000..471590c5 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357340.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357344.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357344.png new file mode 100644 index 00000000..4a5546ce Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357344.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357348.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357348.png new file mode 100644 index 00000000..2b983873 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357348.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357448.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357448.png new file mode 100644 index 00000000..2f8c1f29 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357448.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357452.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357452.png new file mode 100644 index 00000000..c2ebac44 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357452.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357956.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357956.png new file mode 100644 index 00000000..70cacc50 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357956.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357960.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357960.png new file mode 100644 index 00000000..018eec69 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357960.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357968.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357968.png new file mode 100644 index 00000000..96942324 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357968.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679357972.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679357972.png new file mode 100644 index 00000000..d0f585b2 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679357972.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679516672.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679516672.png new file mode 100644 index 00000000..49d35dd8 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679516672.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679516920.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679516920.png new file mode 100644 index 00000000..a4587ee4 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679516920.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679516924.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679516924.png new file mode 100644 index 00000000..b4bf1462 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679516924.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679516928.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679516928.png new file mode 100644 index 00000000..0d4c7a0d Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679516928.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679516932.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679516932.png new file mode 100644 index 00000000..e3472cc6 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679516932.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679516936.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679516936.png new file mode 100644 index 00000000..80452456 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679516936.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679516940.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679516940.png new file mode 100644 index 00000000..0afeb3ce Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679516940.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679516952.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679516952.png new file mode 100644 index 00000000..adbb6637 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679516952.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679517044.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679517044.png new file mode 100644 index 00000000..8830411f Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679517044.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679517048.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679517048.png new file mode 100644 index 00000000..1b7c88e3 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679517048.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679517052.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679517052.png new file mode 100644 index 00000000..e6e804b9 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679517052.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679517060.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679517060.png new file mode 100644 index 00000000..88f9db7b Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679517060.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679517104.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679517104.png new file mode 100644 index 00000000..029f4393 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679517104.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679517708.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679517708.png new file mode 100644 index 00000000..52dda793 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679517708.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679679172.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679679172.png new file mode 100644 index 00000000..53e16117 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679679172.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679679176.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679679176.png new file mode 100644 index 00000000..697ccb12 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679679176.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679679180.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679679180.png new file mode 100644 index 00000000..f6e5b825 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679679180.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679679184.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679679184.png new file mode 100644 index 00000000..902e6b7d Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679679184.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679679192.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679679192.png new file mode 100644 index 00000000..95399fe2 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679679192.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679679196.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679679196.png new file mode 100644 index 00000000..60a649ba Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679679196.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679679200.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679679200.png new file mode 100644 index 00000000..64a3134a Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679679200.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679679204.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679679204.png new file mode 100644 index 00000000..279dbe4f Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679679204.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679679220.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679679220.png new file mode 100644 index 00000000..12f19797 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679679220.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679679232.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679679232.png new file mode 100644 index 00000000..199af4f4 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679679232.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679679236.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679679236.png new file mode 100644 index 00000000..9ed33d17 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679679236.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679679240.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679679240.png new file mode 100644 index 00000000..84c0de75 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679679240.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679679244.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679679244.png new file mode 100644 index 00000000..7b1483b1 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679679244.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679838912.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679838912.png new file mode 100644 index 00000000..126a32c4 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679838912.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679838916.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679838916.png new file mode 100644 index 00000000..965a4957 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679838916.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679838920.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679838920.png new file mode 100644 index 00000000..a95163f6 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679838920.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679838932.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679838932.png new file mode 100644 index 00000000..73f16a43 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679838932.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679838936.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679838936.png new file mode 100644 index 00000000..ce9fd378 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679838936.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679838940.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679838940.png new file mode 100644 index 00000000..179b4a57 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679838940.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679838944.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679838944.png new file mode 100644 index 00000000..2ab75f1e Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679838944.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679838976.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679838976.png new file mode 100644 index 00000000..4d321842 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679838976.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679838980.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679838980.png new file mode 100644 index 00000000..6a0a4fcf Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679838980.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001679838988.png b/docs/modelarts/best-practice/figure/en-us_image_0000001679838988.png new file mode 100644 index 00000000..03fa1a77 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001679838988.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727355913.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727355913.png new file mode 100644 index 00000000..634a248e Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727355913.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727355917.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727355917.png new file mode 100644 index 00000000..14517545 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727355917.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727356109.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727356109.png new file mode 100644 index 00000000..f4b50717 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727356109.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727356165.gif b/docs/modelarts/best-practice/figure/en-us_image_0000001727356165.gif new file mode 100644 index 00000000..2e5ba614 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727356165.gif differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727356169.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727356169.png new file mode 100644 index 00000000..38c589f7 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727356169.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727356173.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727356173.png new file mode 100644 index 00000000..fd69df78 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727356173.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727356185.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727356185.png new file mode 100644 index 00000000..62574bf2 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727356185.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727356201.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727356201.png new file mode 100644 index 00000000..f3f2f670 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727356201.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727356297.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727356297.png new file mode 100644 index 00000000..95e6e834 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727356297.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727356301.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727356301.png new file mode 100644 index 00000000..86f9d5b9 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727356301.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727356305.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727356305.png new file mode 100644 index 00000000..ba17b7bc Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727356305.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727356325.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727356325.png new file mode 100644 index 00000000..dbd36799 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727356325.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727356957.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727356957.png new file mode 100644 index 00000000..90a67f92 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727356957.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727436209.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727436209.png new file mode 100644 index 00000000..8806514b Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727436209.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727436265.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727436265.png new file mode 100644 index 00000000..20d63e7a Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727436265.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727436269.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727436269.png new file mode 100644 index 00000000..05c6db2b Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727436269.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727436285.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727436285.png new file mode 100644 index 00000000..629a39e3 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727436285.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727436289.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727436289.png new file mode 100644 index 00000000..5b30cb47 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727436289.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727436293.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727436293.png new file mode 100644 index 00000000..8465e880 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727436293.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727436297.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727436297.png new file mode 100644 index 00000000..4b19e508 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727436297.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727436301.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727436301.png new file mode 100644 index 00000000..fa1bd2c2 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727436301.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727436393.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727436393.png new file mode 100644 index 00000000..13837414 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727436393.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727436401.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727436401.png new file mode 100644 index 00000000..1ee8d98e Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727436401.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727436405.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727436405.png new file mode 100644 index 00000000..1ee8d98e Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727436405.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727436425.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727436425.png new file mode 100644 index 00000000..5155f9d3 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727436425.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727436429.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727436429.png new file mode 100644 index 00000000..82542a56 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727436429.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727436433.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727436433.png new file mode 100644 index 00000000..d574a77d Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727436433.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727436437.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727436437.png new file mode 100644 index 00000000..47adbd42 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727436437.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727436441.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727436441.png new file mode 100644 index 00000000..ab08da6a Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727436441.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727436541.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727436541.png new file mode 100644 index 00000000..7f0512e0 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727436541.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727436545.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727436545.png new file mode 100644 index 00000000..43cb779e Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727436545.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727437045.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727437045.png new file mode 100644 index 00000000..ae45b7a1 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727437045.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727437053.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727437053.png new file mode 100644 index 00000000..c88b445e Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727437053.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727437057.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727437057.png new file mode 100644 index 00000000..71527768 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727437057.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727718245.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727718245.png new file mode 100644 index 00000000..eb4b345e Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727718245.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727718253.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727718253.png new file mode 100644 index 00000000..0b760093 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727718253.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727718257.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727718257.png new file mode 100644 index 00000000..703d1807 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727718257.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727718261.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727718261.png new file mode 100644 index 00000000..7c590bba Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727718261.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727718265.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727718265.png new file mode 100644 index 00000000..80650868 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727718265.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727718269.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727718269.png new file mode 100644 index 00000000..e78dc6fa Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727718269.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727718277.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727718277.png new file mode 100644 index 00000000..14d689c3 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727718277.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727718281.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727718281.png new file mode 100644 index 00000000..98d7ceeb Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727718281.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727718285.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727718285.png new file mode 100644 index 00000000..c4661ef1 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727718285.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727718289.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727718289.png new file mode 100644 index 00000000..c4ae4b82 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727718289.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727718293.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727718293.png new file mode 100644 index 00000000..b907be82 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727718293.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727718305.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727718305.png new file mode 100644 index 00000000..008e59fb Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727718305.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727718309.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727718309.png new file mode 100644 index 00000000..52b7abf7 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727718309.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727718313.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727718313.png new file mode 100644 index 00000000..c3b441a0 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727718313.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727718317.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727718317.png new file mode 100644 index 00000000..fa8a35e8 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727718317.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727718325.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727718325.png new file mode 100644 index 00000000..82fb0241 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727718325.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727718329.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727718329.png new file mode 100644 index 00000000..a6ad6eb5 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727718329.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727798149.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727798149.png new file mode 100644 index 00000000..cfd0d60b Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727798149.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727798165.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727798165.png new file mode 100644 index 00000000..f6e5b825 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727798165.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727798169.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727798169.png new file mode 100644 index 00000000..a441a153 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727798169.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727798189.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727798189.png new file mode 100644 index 00000000..22917224 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727798189.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727798193.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727798193.png new file mode 100644 index 00000000..c061ae47 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727798193.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727798209.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727798209.png new file mode 100644 index 00000000..374c5ffe Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727798209.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727798213.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727798213.png new file mode 100644 index 00000000..48ed9ac8 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727798213.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727798217.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727798217.png new file mode 100644 index 00000000..12b13d0e Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727798217.png differ diff --git a/docs/modelarts/best-practice/figure/en-us_image_0000001727798221.png b/docs/modelarts/best-practice/figure/en-us_image_0000001727798221.png new file mode 100644 index 00000000..c3b251b5 Binary files /dev/null and b/docs/modelarts/best-practice/figure/en-us_image_0000001727798221.png differ diff --git a/docs/modelarts/best-practice/modelarts_04_0203.html b/docs/modelarts/best-practice/modelarts_04_0203.html new file mode 100644 index 00000000..c72470f8 --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_04_0203.html @@ -0,0 +1,62 @@ + + +

Enabling an Inference Service to Access the Internet

+

This section describes how to enable an inference service to access the Internet.

+

Application Scenarios

An inference service accesses the Internet in the following scenarios:

+ +
+

Solution Design

Use the algorithm on the instance where the inference service is deployed to access the Internet.

+
Figure 1 Networking for an inference service to access the Internet
+
+

Procedure

+
  1. Configure the network for the ModelArts resource pool.

    When purchasing a dedicated resource pool, you can select inference services in Job Type. In this case, the selected network must be accessible to the target VPC.
    Figure 2 Purchasing a dedicated resource pool
    +
    +
    Figure 3 Interconnecting the VPC
    +

    Interconnecting a VPC enables the ModelArts resource pool to exchange data with your VPC.

    +

  2. Install and configure a forward proxy for your VPC.

    Before installing a forward proxy, purchase an ECS with the latest Ubuntu image and bind an EIP to the ECS. Then, log in to the ECS, and install and configure a squid forward proxy.

    +
    1. If Docker is not installed, run the following command to install it:
      curl -sSL https://get.daocloud.io/docker | sh
      +
    2. Pull the squid image.
      docker pull ubuntu/squid
      +
    3. Create a host directory and configure whitelist.conf and squid.conf.

      Create a host directory:

      +
      mkdir –p /etc/squid/
      +

      Add the whitelist.conf configuration file. The content is the addresses that can be accessed. For example:

      +
      .apig.cn-east-3.huaweicloudapis.com
      +

      Add the squid.conf configuration file, which includes the following:

      +
      # An ACL named 'whitelist'
      +acl whitelist dstdomain '/etc/squid/whitelist.conf'
      + 
      +# Allow whitelisted URLs through
      +http_access allow whitelist
      + 
      +# Block the rest
      +http_access deny all
      + 
      +# Default port
      +http_port 3128
      +

      Set the permissions on the host directory and configuration files:

      +
      chmod 640 -R /etc/squid
      +
    4. Start a squid instance.
      docker run -d --name squid -e TZ=UTC -v /etc/squid:/etc/squid -p 3128:3128 ubuntu/squid:latest
      +
    5. If whitelist.conf or squid.conf is updated, go to the container and update the squid.
      docker exec –it squid bash
      +root@{container_id}:/# squid -k reconfigure
      +
    +

  3. Configure the DNS proxy and Internet access URL in the algorithm image.

    1. Set the proxy.

      In the code, specify the private IP address and port of the proxy server, as shown in the following:

      +
      proxies = {
      +  "http": "http://{proxy_server_private_ip}:3128",
      +  "https": "http://{proxy_server_private_ip}:3128"
      +}
      +

      The following figure shows how to obtain the private IP address of a server.

      +
      Figure 4 Private IP address
      +
    2. Configure the Internet access URL.

      In the inference code, use the service URL to send a service request, for example:

      +
      https://e8a048ce25136addbbac23ce6132a.apig.cn-east-3.huaweicloudapis.com
      +
    +

+
+
+
+ + + \ No newline at end of file diff --git a/docs/modelarts/best-practice/modelarts_04_0204.html b/docs/modelarts/best-practice/modelarts_04_0204.html new file mode 100644 index 00000000..838e8acf --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_04_0204.html @@ -0,0 +1,30 @@ + + +

End-to-End O&M of Inference Services

+

The end-to-end O&M of ModelArts inference services involves the entire AI process including algorithm development, service O&M, and service running.

+

Overview

End-to-End O&M Process

+ +
Figure 1 End-to-end O&M process for inference services
+

During the entire O&M process, service request failures and high resource usage are monitored. When the resource usage threshold is reached, the system will send an alarm notification to you.

+
Figure 2 Alarming process
+

Advantages

+

End-to-end service O&M enables you to easily check service running at both peak and off-peak hours and detect the health status of real-time services in real time.

+

Constraints

+

End-to-end service O&M applies only to real-time services because Cloud Eye does not monitor batch or edge inference services.

+
+

Procedure

This section uses an occupant safety algorithm in travel as an example to describe how to use ModelArts for process-based service deployment and update, as well as automatic service O&M and monitoring.

+
Figure 3 Occupant safety algorithm implementation
+
+
  1. Use a locally developed model to create a custom image and use the image to create an AI application on ModelArts. For details, see Creating a Custom Image and Using It to Create an AI Application.
  2. On the ModelArts management console, deploy the created AI application as a real-time service.
  3. Log in to the Cloud Eye management console, configure ModelArts alarm rules and enable notifications with a topic subscribed to. For details, see Setting Alarm Rules.

    After the configuration, choose Cloud Service Monitoring > ModelArts in the navigation pane on the left to view the requests and resource usage of the real-time service.

    +
    Figure 4 Viewing service monitoring metrics
    +

    When an alarm is triggered based on the monitored data, the object who has subscribed to the target topic will receive a message notification.

    +

+
+ + + \ No newline at end of file diff --git a/docs/modelarts/best-practice/modelarts_10_0001.html b/docs/modelarts/best-practice/modelarts_10_0001.html new file mode 100644 index 00000000..033ddc53 --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_10_0001.html @@ -0,0 +1,57 @@ + + +

Huawei Cloud Mascot Detection (Using ExeML for Object Detection)

+

ModelArts provides ExeML for service developers, freeing you from model development and parameter tuning. With ExeML, you can finish an AI development project in just three steps, including data labeling, auto training, and service deployment.

+

As an example of object detection, this section describes how to detect Yunbao, the mascot of HUAWEI CLOUD, to help you quickly get started with ModelArts. By using the built-in Yunbao image dataset, the system automatically trains and generates a detection model, and deploys the generated model as a real-time service. After the deployment is completed, you can use the real-time service to identify whether an input image contains Yunbao.

+

Before you start, carefully complete the preparations described in Preparations. To use ExeML to build a model, perform the following steps:

+ +

Preparations

+
+

Step 1: Prepare Data

ModelArts provides a sample dataset of Yunbao named Yunbao-Data-Custom. This example uses this dataset to build a model. Perform the following operations to upload the dataset to the OBS directory test-modelarts/dataset-yunbao created in preparation. The OBS bucket name test-modelarts is for reference only. You need to customize an OBS bucket name.

+

If you want to use your own dataset, skip this step, upload the dataset to the OBS folder, and select this directory in Step 2: Create an Object Detection Project.

+
  1. Download the Yunbao-Data-Custom dataset to the local PC.
  2. Decompress the Yunbao-Data-Custom.zip file to the Yunbao-Data-Custom directory on the local PC.
  3. Batch upload all files from the Yunbao-Data-Custom folder to the test-modelarts/dataset-yunbao directory on OBS. For details, see Uploading a File.

    The obtained dataset has two directories: eval and train. The data stored in train is used for model training, and the data stored in eval is used for model prediction.

    +
+
+

Step 2: Create an Object Detection Project

  1. On the ModelArts management console, click ExeML in the left navigation pane.
  2. In the Object Detection box, click Create Project. On the Create Object Detection Project page that is displayed, enter a project name and a dataset name, and select an input dataset path. The OBS path of the Yunbao dataset is /test-modelarts/dataset-yunbao/train/. Select an empty directory in Output Dataset Path.

    The Yunbao dataset has two directories: eval and train. Select the data in the train directory for training. If the upper-layer directory of train is selected, an error message is displayed, indicating that OBS has invalid data. As a result, the project will fail to be created.

    +
    +
    Figure 1 Creating an object detection project
    +
  3. Click Create Project. The object detection project is created. After the project is created, the ExeML > Label Data page is displayed and data source synchronization is automatically performed.
+
+

Step 3: Label Data

For an object detection project, labeling data is to locate an object in an image and assign a label to the object. The labeled data is used for model training. In the Yunbao dataset, part of data has been labeled. You can label the unlabeled data for trial use.

+

Data source synchronization is automatically performed when you create an ExeML project. Data source synchronization takes a certain period of time. If the synchronization fails, you can click Synchronize Data Source to manually execute the synchronization.

+
  1. On the ExeML > Label Data page, click the Unlabeled tab. All unlabeled images are displayed. Click an image to go to the labeling page.
  2. Left-click and drag the mouse to select the area where Yunbao is located. In the dialog box that is displayed, enter the label name, for example, yunbao, and press Enter. After the labeling is completed, the status of the image changes to Labeled in the left Image Catalog pane.
    You can select another image from the image catalog in the lower part of the page and repeat the preceding steps to label the image. If an image contains more than one Yunbao, you can label all. You are advised to label all images in the dataset to train a model with better precision.
    Figure 2 Image labeling for object detection
    +
    +
  3. After all images in the image directory are labeled, click the project name in the upper left corner. In the dialog box that is displayed, click OK to save the labeling information. On the Labeled tab page, you can view the labeled images and view the label names and quantity in the right pane.
+
+

Step 4: Generate a Model with ExeML

  1. After data labeling is completed, click Train in the upper right corner of the data labeling page. In the Training Configuration dialog box that is displayed, set related parameters. For details, see Figure 3.
    Figure 3 Setting training parameters
    +
  2. Click Next. On the configuration page that is displayed, confirm the specifications and click Submit to start auto model training. The training takes a certain period of time. If you close or exit the page, the system continues training until it is completed.
    After the training is completed, you can view the training details on the page, such as the accuracy, evaluation result, training parameters, and classification statistics.
    Figure 4 Model training
    +
    +
+
+

Step 5: Deploy the Model as a Real-Time Service

  1. On the Train Model tab page, wait until the training status changes to Completed. Click Deploy in the Version Manager pane.
    Figure 5 Deploying a service
    +
  2. In the displayed Deploy dialog box, set Specifications and Auto Stop, and click OK to deploy the object detection model as a real-time service.

    If you select free specifications, you do not need to set Auto Stop, because the node will be stopped one hour later.

    +
    Figure 6 Deployment settings
    +
  3. After the deployment is started, the system automatically switches to the Deploy Service tab page. This page displays the deployment progress and status.
    The deployment takes a certain period of time. After the deployment is completed, the status in the Version Manager pane changes to Running.
    Figure 7 Successful deployment
    +
    +
+
+

Step 6: Test the Service

After the model is deployed, you can test the service using an image.

+
  1. On the Deployment Online tab page, select a running service version, and click Upload to upload a local image.
    Figure 8 Uploading an image
    +
  2. Select an image from a local environment. The image must contain Yunbao. Click Predict to perform the test.

    After the prediction is completed, the label name yunbao, location coordinates, and confidence score are displayed in the prediction result pane on the right. In the prediction result, detection_boxes indicates the location of the object, detection_scores indicates the detection score of yunbao.

    +

    If the model accuracy does not meet your expectation, add images on the Label Data tab page, label the images, and train and deploy the model again.

    +

    A running real-time service keeps consuming the resources. If you do not need to use the real-time service, click Stop in the Version Manager pane to stop the service and avoid unnecessary billing. If you want to use the service again, click Start.

    +
    +
    Figure 9 Test result
    +
    +
+
+
+ + + \ No newline at end of file diff --git a/docs/modelarts/best-practice/modelarts_10_0002.html b/docs/modelarts/best-practice/modelarts_10_0002.html new file mode 100644 index 00000000..32715e53 --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_10_0002.html @@ -0,0 +1,246 @@ + + +

Bank Deposit Prediction (Using ExeML for Predictive Analytics)

+

Banks often predict whether customers would be interested in a time deposit based on their characteristics, including the age, work type, marital status, education background, housing loan, and personal loan.

+

Now, you can use the ExeML function on ModelArts to predict whether a customer would be interested in the time deposit. The process of using ExeML is as follows:

+
  1. Preparing Data: Download a dataset and upload it to Object Storage Service (OBS) on HUAWEI CLOUD.
  2. Creating a Predictive Analytics Project: Create a predictive analytics project based on the existing dataset.
  3. Training a Model: Preview the data and select the training objective, and then start the model training.
  4. Deploying the Model: Deploy the trained model as a real-time service and test the prediction result.
+

Preparing Data

In this example, the dataset is from the Machine Learning Repository of UCI. For details about the dataset, see Bank Marketing Data Set. Table 1 and Table 2 describe the parameters and sample data of the dataset. You can obtain the dataset from GitHub and upload it to OBS.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameters and meanings of data sources

Parameter

+

Meaning

+

Type

+

Description

+

attr_1

+

Age

+

Int

+

Age of the customer

+

attr_2

+

Occupation

+

String

+

Occupation of the customer

+

attr_3

+

Marital status

+

String

+

Marital status of the customer

+

attr_4

+

Education background

+

String

+

Education background of the customer

+

attr_5

+

Real estate

+

String

+

Real estate of the customer

+

attr_6

+

Loan

+

String

+

Loan of the customer

+

attr_7

+

Deposit

+

String

+

Deposit of the customer

+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2 Sample data of the dataset

attr_1

+

attr_2

+

attr_3

+

attr_4

+

attr_5

+

attr_6

+

attr_7

+

31

+

blue-collar

+

married

+

secondary

+

yes

+

no

+

no

+

41

+

management

+

married

+

tertiary

+

yes

+

yes

+

no

+

38

+

technician

+

single

+

secondary

+

yes

+

no

+

no

+

39

+

technician

+

single

+

secondary

+

yes

+

no

+

yes

+

39

+

blue-collar

+

married

+

secondary

+

yes

+

no

+

no

+

39

+

services

+

single

+

unknown

+

yes

+

no

+

no

+
+
+
  1. Download the ModelArts-Lab project from GitHub and obtain the train.csv training data file from the \ModelArts-Lab-master\official_examples\Using_ModelArts_to_Create_a_Bank_Marketing_Application\data directory of the project.
  2. Upload the train.csv file to OBS, for example, to the test-modelarts/bank-marketing directory. For details about how to upload files to OBS, see Uploading a File.
+
+

Creating a Predictive Analytics Project

  1. On the ModelArts management console, click ExeML in the left navigation pane.
    Figure 1 ExeML
    +
  2. On the ExeML page, click Create Project in the Predictive Analytics area.
  3. On the Create Predictive Analytics Project page, set the project name and select the OBS path where the training data is stored. In this example, the dataset path is test-modelarts/bank-marketing/train.csv. Click Create Now. The data labeling page is displayed, as shown in Figure 2.
    Figure 2 Creating a predictive analytics project
    +
+
+

Training a Model

  1. On the data labeling page, preview the data and select the training objective on the page displayed. The training objective here is to determine whether the customer will apply for a deposit (specified in attr_7). Set Label Column Data Type to Discrete value. After the training objective is specified, click Train.

    For successful prediction, make sure that the label column is attr_7 and that the data in the label column is of the discrete type.

    +
    +
    Figure 3 Selecting the training objective
    +
  2. In the displayed Training Configuration dialog box, select an instance flavor used for training, click Next to check the training configuration, and click Submit to start model training.

    The training takes a certain period of time. If you close or exit the page, the system continues training until it is complete.

    +
    Figure 4 Training Configuration
    +
  3. In the upper left corner of the model training page, if the status of the training job changes to Completed, the training job is complete. The Training Details area on the right shows the details about the training job.
    Figure 5 Training job completed
    +
+
+

Deploying the Model

  1. On the Train Model page, click Deploy in the Version Manager area. Then, the system starts to deploy the service and switches to the Deploy Service page.
    Figure 6 Deploy
    +
    • Specifications: Compute-intensive 3 instance (CPU)
    • Compute Nodes: The default value is 1.
    • Auto Stop: Set the auto stop time as required. This function is enabled by default.
    +
  2. Click Next and then Submit.
  3. Wait until the deployment is completed.
  4. In the Version Manager area, when the status changes to Running, the service has been deployed. You can test the service in the prediction area.

    The following shows the test code. As shown in Figure 7, the prediction result is "predict": "no", indicating that the customer will not apply for a deposit.

    +

    +
    {
    +
    +  "data": 
    +  {
    +    "count": 1,
    +    "req_data": 
    +	[
    +      {
    +        "attr_1": "34",
    +        "attr_2": "blue-collar",
    +        "attr_3": "single",
    +        "attr_4": "tertiary",
    +        "attr_5": "no",
    +        "attr_6": "no"
    +      }
    +    ]
    +  }
    +}
    +

    +
    Figure 7 Testing the model
    +
  5. If an error message is displayed on the prediction page, indicating the prediction failure, the label column may be incorrectly selected on the Label Data page. Ensure that the label column is attr_7 and the data in the label column is of the discrete type. Then, perform training and service deployment again.
    Figure 8 Prediction failure
    +
    Figure 9 Expected configuration of the label column and data type
    +
+
+
+
+ + + \ No newline at end of file diff --git a/docs/modelarts/best-practice/modelarts_10_0062.html b/docs/modelarts/best-practice/modelarts_10_0062.html new file mode 100644 index 00000000..f303a3d8 --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_10_0062.html @@ -0,0 +1,116 @@ + + +

Scenarios

+

Certain ModelArts functions require the permission to access other services. This section describes how to assign specific permissions to IAM users when they use ModelArts.

+

Permissions

The permissions of IAM users are controlled by their tenant user. Logging in as a tenant user, you can assign permissions to the target user group through IAM. Then, the permissions are assigned to all members in the user group. The following authorization list uses the system-defined policies of ModelArts and other services as an example.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Service authorization

Target Service

+

Description

+

IAM Permission

+

Mandatory

+

ModelArts

+

Assign permissions to IAM users for using ModelArts.

+

The users with the ModelArts CommonOperations permission can only use resources, but cannot create, update, or delete any dedicated resource pool. You are advised to assign this permission to IAM users.

+

ModelArts CommonOperations

+

Yes

+

The users with the ModelArts FullAccess permission have all access permissions, including creating, updating, and deleting dedicated resource pools. Exercise caution when selecting this option.

+

ModelArts FullAccess

+

No

+

Select either ModelArts FullAccess or ModelArts CommonOperations.

+

Object Storage Service (OBS)

+

Assign permissions to IAM users for using OBS. ModelArts data management, development environments, training jobs, and model deployment require OBS for forwarding data.

+

OBS OperateAccess

+

Yes

+

Software Repository for Container (SWR)

+

Assign permissions to IAM users for using SWR. ModelArts custom images require the SWR FullAccess permission.

+

SWR OperateAccess

+

Yes

+

Key Management Service (KMS)

+

To use remote SSH of ModelArts notebook, IAM users require KMS authorization.

+

KMS CMKFullAccess

+

No

+

Intelligent EdgeFabric (IEF)

+

Assign permissions to IAM users for using IEF. Tenant administrator permissions are required so that ModelArts edge services depending on IEF can be used.

+

Tenant Administrator

+

No

+

Cloud Eye

+

Assign permissions to IAM users for using Cloud Eye. Using Cloud Eye, you can view the running statuses of ModelArts real-time services and AI application loads, and set monitoring alarms.

+

CES FullAccess

+

No

+

Simple Message Notification (SMN)

+

Assign permissions to IAM users for using SMN. SMN is used with Cloud Eye.

+

SMN FullAccess

+

No

+

Virtual Private Cloud (VPC)

+

During the creation of a dedicated resource pool for ModelArts, IAM users require VPC permissions so that they can customize networks.

+

VPC FullAccess

+

No

+

Scalable File Service (SFS)

+
  

SFS Turbo FullAccess

+

SFS FullAccess

+
  
+
+
+
+
+ +
+ diff --git a/docs/modelarts/best-practice/modelarts_10_0072.html b/docs/modelarts/best-practice/modelarts_10_0072.html new file mode 100644 index 00000000..88eacb96 --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_10_0072.html @@ -0,0 +1,122 @@ + + +

Creating a Custom Image and Using It to Create an AI Application

+

If you want to use an AI engine that is not supported by ModelArts, create a custom image for the engine, import the image to ModelArts, and use the image to create AI applications. This section describes how to use a custom image to create an AI application and deploy the application as a real-time service.

+

The process is as follows:

+
  1. Building an Image Locally: Create a custom image package locally. For details, see Custom Image Specifications for Creating AI Applications.
  2. Verifying the Image Locally and Uploading It to SWR: Verify the APIs of the custom image and upload the custom image to SWR.
  3. Using the Custom Image to Create an AI Application: Import the image to ModelArts AI application management.
  4. Deploying the AI Application as a Real-Time Service: Deploy the model as a real-time service.
+

Building an Image Locally

This section uses a Linux x86_x64 host as an example. You can purchase an ECS of the same specifications or use an existing local host to create a custom image.

+
For details about how to purchase an ECS, see Purchasing and Logging In to a Linux ECS. When creating the ECS, select an Ubuntu 18.04 public image.
Figure 1 Creating an ECS using an x86 public image
+
+
  1. After logging in to the host, install Docker. For details, see Docker official documents. Alternatively, run the following commands to install Docker:
    curl -fsSL get.docker.com -o get-docker.sh
    +sh get-docker.sh
    +
  2. Obtain the base image. Ubuntu 18.04 is used in this example.
    docker pull ubuntu:18.04
    +
  3. Create the self-define-images folder, and edit Dockerfile and test_app.py in the folder for the custom image. In the sample code, the application code runs on the Flask framework.
    The file structure is as follows:
    self-define-images/
    +    --Dockerfile
    +    --test_app.py
    +
    +
    • Dockerfile
      From ubuntu:18.04
      +# Configure the HUAWEI CLOUD source and install Python, Python3-PIP, and Flask.
      +RUN cp -a /etc/apt/sources.list /etc/apt/sources.list.bak && \
      +  sed -i "s@http://.*security.ubuntu.com@http://repo.huaweicloud.com@g" /etc/apt/sources.list && \
      +  sed -i "s@http://.*archive.ubuntu.com@http://repo.huaweicloud.com@g" /etc/apt/sources.list && \
      +  apt-get update && \
      +  apt-get install -y python3 python3-pip && \
      +  pip3 install  --trusted-host https://repo.huaweicloud.com -i https://repo.huaweicloud.com/repository/pypi/simple  Flask
      +
      +# Copy the application code to the image.
      +COPY test_app.py /opt/test_app.py
      +
      +# Specify the boot command of the image.
      +CMD python3  /opt/test_app.py
      +
    • test_app.py
      from flask import Flask, request
      +import json 
      +app = Flask(__name__)
      +
      +@app.route('/greet', methods=['POST'])
      +def say_hello_func():
      +    print("----------- in hello func ----------")
      +    data = json.loads(request.get_data(as_text=True))
      +    print(data)
      +    username = data['name']
      +    rsp_msg = 'Hello, {}!'.format(username)
      +    return json.dumps({"response":rsp_msg}, indent=4)
      +
      +@app.route('/goodbye', methods=['GET'])
      +def say_goodbye_func():
      +    print("----------- in goodbye func ----------")
      +    return '\nGoodbye!\n'
      +
      +
      +@app.route('/', methods=['POST'])
      +def default_func():
      +    print("----------- in default func ----------")
      +    data = json.loads(request.get_data(as_text=True))
      +    return '\n called default func !\n {} \n'.format(str(data))
      +
      +# host must be "0.0.0.0", port must be 8080
      +if __name__ == '__main__':
      +    app.run(host="0.0.0.0", port=8080)
      +
    +
  4. Switch to the self-define-images folder and run the following command to create custom image test:v1:
    docker build -t test:v1 .
    +
  5. Run docker images to view the custom image you have created.
+
+

Verifying the Image Locally and Uploading It to SWR

  1. Run the following command in the local environment to start the custom image:
    docker run -it -p 8080:8080 test:v1
    +
    Figure 2 Starting a custom image
    +
  2. Open another terminal and run the following commands to test the functions of the three APIs of the custom image:
    curl -X POST -H "Content-Type: application/json" --data '{"name":"Tom"}'  127.0.0.1:8080/
    +curl -X POST -H "Content-Type: application/json" --data '{"name":"Tom"}' 127.0.0.1:8080/greet
    +curl -X GET 127.0.0.1:8080/goodbye
    +

    If information similar to the following is displayed, the function verification is successful.

    +
    Figure 3 Testing API functions
    +
  1. Upload the custom image to SWR. For details, see How Can I Upload Images to SWR?
  2. View the uploaded image on the My Images > Private Images page of the SWR console.
    Figure 4 Uploaded images
    +
+
+

Using the Custom Image to Create an AI Application

Import a meta model. For details, see Creating and Importing a Model Image. Key parameters are as follows:
  • Meta Model Source: Select Container image.
    • Container Image Path: Select the created private image.
      Figure 5 Created private image
      +
    • Container API: Protocol and port number for starting a model. Ensure that the protocol and port number are the same as those provided in the custom image.
    • Image Replication: indicates whether to copy the model image in the container image to ModelArts. This parameter is optional.
    • Health Check: checks health status of a model. This parameter is optional. This parameter is configurable only when the health check API is configured in the custom image. Otherwise, creating the AI application will fail.
    +
  • APIs: APIs of a custom image. This parameter is optional. The model APIs must comply with ModelArts specifications. For details, see Specifications for Compiling the Model Configuration File.
    The configuration file is as follows:
    [{
    +        "url": "/",
    +        "method": "post",
    +        "request": {
    +            "Content-type": "application/json"
    +        },
    +        "response": {
    +            "Content-type": "application/json"
    +        }
    +    },
    +{
    +        "url": "/greet",
    +        "method": "post",
    +        "request": {
    +            "Content-type": "application/json"
    +        },
    +        "response": {
    +            "Content-type": "application/json"
    +        }
    +    },
    +{
    +        "url": "/goodbye",
    +        "method": "get",
    +        "request": {
    +            "Content-type": "application/json"
    +        },
    +        "response": {
    +            "Content-type": "application/json"
    +        }
    +    }
    +]
    +
    +
+
+
+

Deploying the AI Application as a Real-Time Service

  1. Deploy the AI application as a real-time service. For details, see Deploying as a Real-Time Service.
  2. View the details about the real-time service.
    Figure 6 Usage Guides
    +
  3. Access the real-time service on the Prediction tab page.
    Figure 7 Accessing a real-time service
    +
+
+
+ + + \ No newline at end of file diff --git a/docs/modelarts/best-practice/modelarts_10_0080.html b/docs/modelarts/best-practice/modelarts_10_0080.html new file mode 100644 index 00000000..1dca6984 --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_10_0080.html @@ -0,0 +1,545 @@ + + +

Using a Custom Algorithm to Build a Handwritten Digit Recognition Model

+

This section describes how to modify a local custom algorithm to train and deploy models on ModelArts.

+

Scenarios

This case describes how to use PyTorch 1.8 to recognize handwritten digit images. An official MNIST dataset is used in this case.

+

Through this case, you can learn how to train jobs, deploy an inference model, and perform prediction on ModelArts.

+
+

Process

Before performing the following operations, complete necessary operations. For details, see Preparations.

+
  1. Step 1 Prepare the Training Data: Download the MNIST dataset.
  2. Step 2 Prepare Training Files and Inference Files: Write training and inference code.
  3. Step 3 Create an OBS Bucket and Upload Files to OBS: Create an OBS bucket and folder, and upload the dataset, training script, inference script, and inference configuration file to OBS.
  4. Step 4 Create a Training Job: Train a model.
  5. Step 5 Deploy the Model for Inference: Import the trained model to ModelArts, create an AI application, and deploy the AI application as a real-time service.
  6. Step 6 Perform Prediction: Upload a handwritten digit image and send an inference request to obtain the inference result.
  7. Step 7 Release Resources: Stop the service and delete the data in OBS to stop billing.
+
+

Preparations

+
+

Step 1 Prepare the Training Data

An MNIST dataset downloaded from the MNIST official website is used in this case. Ensure that the four files are all downloaded.

+
Figure 3 MNIST dataset
+
  • train-images-idx3-ubyte.gz: compressed package of the training set, which contains 60,000 samples
  • train-labels-idx1-ubyte.gz: compressed package of the training set labels, which contain the labels of the 60,000 samples
  • t10k-images-idx3-ubyte.gz: compressed package of the validation set, which contains 10,000 samples
  • t10k-labels-idx1-ubyte.gz: compressed package of the validation set labels, which contain the labels of the 10,000 samples
+
+

If you are asked to enter the login information after you click the MNIST official website link, copy and paste this link in the address box of your browser: http://yann.lecun.com/exdb/mnist/

+

The login information is required when you open the link in HTTPS mode, which is not required if you open the link in HTTP mode.

+
+
+

Step 2 Prepare Training Files and Inference Files

In this case, ModelArts provides the training script, inference script, and inference configuration file.

+

When pasting code from a .py file, create a .py file. Otherwise, the error message "SyntaxError: 'gbk' codec can't decode byte 0xa4 in position 324: illegal multibyte sequence" may be displayed.

+
+

Create the training script train.py on the local host. The content is as follows:

+
# base on https://github.com/pytorch/examples/blob/main/mnist/main.py
+
+from __future__ import print_function
+
+import os
+import gzip
+import codecs
+import argparse
+from typing import IO, Union
+
+import numpy as np
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torchvision import datasets, transforms    
+from torch.optim.lr_scheduler import StepLR
+
+import shutil
+
+
+# Define a network model.
+class Net(nn.Module):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.conv1 = nn.Conv2d(1, 32, 3, 1)
+        self.conv2 = nn.Conv2d(32, 64, 3, 1)
+        self.dropout1 = nn.Dropout(0.25)
+        self.dropout2 = nn.Dropout(0.5)
+        self.fc1 = nn.Linear(9216, 128)
+        self.fc2 = nn.Linear(128, 10)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = F.relu(x)
+        x = self.conv2(x)
+        x = F.relu(x)
+        x = F.max_pool2d(x, 2)
+        x = self.dropout1(x)
+        x = torch.flatten(x, 1)
+        x = self.fc1(x)
+        x = F.relu(x)
+        x = self.dropout2(x)
+        x = self.fc2(x)
+        output = F.log_softmax(x, dim=1)
+        return output
+
+
+# Train the model. Set the model to the training mode, load the training data, calculate the loss function, and perform gradient descent.
+def train(args, model, device, train_loader, optimizer, epoch):
+    model.train()
+    for batch_idx, (data, target) in enumerate(train_loader):
+        data, target = data.to(device), target.to(device)
+        optimizer.zero_grad()
+        output = model(data)
+        loss = F.nll_loss(output, target)
+        loss.backward()
+        optimizer.step()
+        if batch_idx % args.log_interval == 0:
+            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
+                epoch, batch_idx * len(data), len(train_loader.dataset),
+                100. * batch_idx / len(train_loader), loss.item()))
+            if args.dry_run:
+                break
+
+
+# Validate the model. Set the model to the validation mode, load the validation data, and calculate the loss function and accuracy.
+def test(model, device, test_loader):
+    model.eval()
+    test_loss = 0
+    correct = 0
+    with torch.no_grad():
+        for data, target in test_loader:
+            data, target = data.to(device), target.to(device)
+            output = model(data)
+            test_loss += F.nll_loss(output, target, reduction='sum').item()
+            pred = output.argmax(dim=1, keepdim=True)
+            correct += pred.eq(target.view_as(pred)).sum().item()
+
+    test_loss /= len(test_loader.dataset)
+
+    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
+        test_loss, correct, len(test_loader.dataset),
+        100. * correct / len(test_loader.dataset)))
+
+
+# The following is PyTorch MNIST.
+# https://github.com/pytorch/vision/blob/v0.9.0/torchvision/datasets/mnist.py
+def get_int(b: bytes) -> int:
+    return int(codecs.encode(b, 'hex'), 16)
+
+
+def open_maybe_compressed_file(path: Union[str, IO]) -> Union[IO, gzip.GzipFile]:
+    """Return a file object that possibly decompresses 'path' on the fly.
+       Decompression occurs when argument `path` is a string and ends with '.gz' or '.xz'.
+    """
+    if not isinstance(path, torch._six.string_classes):
+        return path
+    if path.endswith('.gz'):
+        return gzip.open(path, 'rb')
+    if path.endswith('.xz'):
+        return lzma.open(path, 'rb')
+    return open(path, 'rb')
+
+
+SN3_PASCALVINCENT_TYPEMAP = {
+    8: (torch.uint8, np.uint8, np.uint8),
+    9: (torch.int8, np.int8, np.int8),
+    11: (torch.int16, np.dtype('>i2'), 'i2'),
+    12: (torch.int32, np.dtype('>i4'), 'i4'),
+    13: (torch.float32, np.dtype('>f4'), 'f4'),
+    14: (torch.float64, np.dtype('>f8'), 'f8')
+}
+
+
+def read_sn3_pascalvincent_tensor(path: Union[str, IO], strict: bool = True) -> torch.Tensor:
+    """Read a SN3 file in "Pascal Vincent" format (Lush file 'libidx/idx-io.lsh').
+       Argument may be a filename, compressed filename, or file object.
+    """
+    # read
+    with open_maybe_compressed_file(path) as f:
+        data = f.read()
+    # parse
+    magic = get_int(data[0:4])
+    nd = magic % 256
+    ty = magic // 256
+    assert 1 <= nd <= 3
+    assert 8 <= ty <= 14
+    m = SN3_PASCALVINCENT_TYPEMAP[ty]
+    s = [get_int(data[4 * (i + 1): 4 * (i + 2)]) for i in range(nd)]
+    parsed = np.frombuffer(data, dtype=m[1], offset=(4 * (nd + 1)))
+    assert parsed.shape[0] == np.prod(s) or not strict
+    return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)
+
+
+def read_label_file(path: str) -> torch.Tensor:
+    with open(path, 'rb') as f:
+        x = read_sn3_pascalvincent_tensor(f, strict=False)
+    assert(x.dtype == torch.uint8)
+    assert(x.ndimension() == 1)
+    return x.long()
+
+
+def read_image_file(path: str) -> torch.Tensor:
+    with open(path, 'rb') as f:
+        x = read_sn3_pascalvincent_tensor(f, strict=False)
+    assert(x.dtype == torch.uint8)
+    assert(x.ndimension() == 3)
+    return x
+
+
+def extract_archive(from_path, to_path):
+    to_path = os.path.join(to_path, os.path.splitext(os.path.basename(from_path))[0])
+    with open(to_path, "wb") as out_f, gzip.GzipFile(from_path) as zip_f:
+        out_f.write(zip_f.read())
+# The above is pytorch mnist.
+# --- end
+
+
+# Raw MNIST dataset processing
+def convert_raw_mnist_dataset_to_pytorch_mnist_dataset(data_url):
+    """
+    raw
+
+    {data_url}/
+        train-images-idx3-ubyte.gz
+        train-labels-idx1-ubyte.gz
+        t10k-images-idx3-ubyte.gz
+        t10k-labels-idx1-ubyte.gz
+
+    processed
+
+    {data_url}/
+        train-images-idx3-ubyte.gz
+        train-labels-idx1-ubyte.gz
+        t10k-images-idx3-ubyte.gz
+        t10k-labels-idx1-ubyte.gz
+        MNIST/raw
+            train-images-idx3-ubyte
+            train-labels-idx1-ubyte
+            t10k-images-idx3-ubyte
+            t10k-labels-idx1-ubyte
+        MNIST/processed
+            training.pt
+            test.pt
+    """
+    resources = [
+        "train-images-idx3-ubyte.gz",
+        "train-labels-idx1-ubyte.gz",
+        "t10k-images-idx3-ubyte.gz",
+        "t10k-labels-idx1-ubyte.gz"
+    ]
+
+    pytorch_mnist_dataset = os.path.join(data_url, 'MNIST')
+
+    raw_folder = os.path.join(pytorch_mnist_dataset, 'raw')
+    processed_folder = os.path.join(pytorch_mnist_dataset, 'processed')
+
+    os.makedirs(raw_folder, exist_ok=True)
+    os.makedirs(processed_folder, exist_ok=True)
+
+    print('Processing...')
+
+    for f in resources:
+        extract_archive(os.path.join(data_url, f), raw_folder)
+
+    training_set = (
+        read_image_file(os.path.join(raw_folder, 'train-images-idx3-ubyte')),
+        read_label_file(os.path.join(raw_folder, 'train-labels-idx1-ubyte'))
+    )
+    test_set = (
+        read_image_file(os.path.join(raw_folder, 't10k-images-idx3-ubyte')),
+        read_label_file(os.path.join(raw_folder, 't10k-labels-idx1-ubyte'))
+    )
+    with open(os.path.join(processed_folder, 'training.pt'), 'wb') as f:
+        torch.save(training_set, f)
+    with open(os.path.join(processed_folder, 'test.pt'), 'wb') as f:
+        torch.save(test_set, f)
+
+    print('Done!')
+
+
+def main():
+    # Define the preset running parameters of the training job.
+    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
+
+    parser.add_argument('--data_url', type=str, default=False,
+                        help='mnist dataset path')
+    parser.add_argument('--train_url', type=str, default=False,
+                        help='mnist model path')
+
+    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
+                        help='input batch size for training (default: 64)')
+    parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
+                        help='input batch size for testing (default: 1000)')
+    parser.add_argument('--epochs', type=int, default=14, metavar='N',
+                        help='number of epochs to train (default: 14)')
+    parser.add_argument('--lr', type=float, default=1.0, metavar='LR',
+                        help='learning rate (default: 1.0)')
+    parser.add_argument('--gamma', type=float, default=0.7, metavar='M',
+                        help='Learning rate step gamma (default: 0.7)')
+    parser.add_argument('--no-cuda', action='store_true', default=False,
+                        help='disables CUDA training')
+    parser.add_argument('--dry-run', action='store_true', default=False,
+                        help='quickly check a single pass')
+    parser.add_argument('--seed', type=int, default=1, metavar='S',
+                        help='random seed (default: 1)')
+    parser.add_argument('--log-interval', type=int, default=10, metavar='N',
+                        help='how many batches to wait before logging training status')
+    parser.add_argument('--save-model', action='store_true', default=True,
+                        help='For Saving the current Model')
+    args = parser.parse_args()
+
+    use_cuda = not args.no_cuda and torch.cuda.is_available()
+
+    torch.manual_seed(args.seed)
+
+    # Set whether to use GPU or CPU to run the algorithm.
+    device = torch.device("cuda" if use_cuda else "cpu")
+
+    train_kwargs = {'batch_size': args.batch_size}
+    test_kwargs = {'batch_size': args.test_batch_size}
+    if use_cuda:
+        cuda_kwargs = {'num_workers': 1,
+                       'pin_memory': True,
+                       'shuffle': True}
+        train_kwargs.update(cuda_kwargs)
+        test_kwargs.update(cuda_kwargs)
+
+    # Define the data preprocessing method. 
+    transform=transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.1307,), (0.3081,))
+        ])
+
+    # Convert the raw MNIST dataset to a PyTorch MNIST dataset.
+    convert_raw_mnist_dataset_to_pytorch_mnist_dataset(args.data_url)
+
+    # Create a training dataset and a validation dataset.
+    dataset1 = datasets.MNIST(args.data_url, train=True, download=False,
+                       transform=transform)
+    dataset2 = datasets.MNIST(args.data_url, train=False, download=False,
+                       transform=transform)
+
+    # Create iterators for the training dataset and the validation dataset.
+    train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs)
+    test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
+
+    # Initialize the neural network model and copy the model to the compute device. 
+    model = Net().to(device)
+    # Define the training optimizer and learning rate for gradient descent calculation.
+    optimizer = optim.Adadelta(model.parameters(), lr=args.lr)
+    scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
+
+    # Train the neural network and perform validation in each epoch.
+    for epoch in range(1, args.epochs + 1):
+        train(args, model, device, train_loader, optimizer, epoch)
+        test(model, device, test_loader)
+        scheduler.step()
+
+    # Save the model and make it adapted to the ModelArts inference model package specifications.
+    if args.save_model:
+
+        # Create the model directory in the path specified in train_url.
+        model_path = os.path.join(args.train_url, 'model')
+        os.makedirs(model_path, exist_ok = True)
+
+        # Save the model to the model directory based on the ModelArts inference model package specifications.
+        torch.save(model.state_dict(), os.path.join(model_path, 'mnist_cnn.pt'))
+
+        # Copy the inference code and configuration file to the model directory. 
+        the_path_of_current_file = os.path.dirname(__file__)
+        shutil.copyfile(os.path.join(the_path_of_current_file, 'infer/customize_service.py'), os.path.join(model_path, 'customize_service.py'))
+        shutil.copyfile(os.path.join(the_path_of_current_file, 'infer/config.json'), os.path.join(model_path, 'config.json'))
+
+if __name__ == '__main__':
+    main()
+

Create the inference script customize_service.py on the local host. The content is as follows:

+
import os
+import log
+import json
+
+import torch.nn.functional as F
+import torch.nn as nn
+import torch
+import torchvision.transforms as transforms
+
+import numpy as np
+from PIL import Image
+
+from model_service.pytorch_model_service import PTServingBaseService
+
+logger = log.getLogger(__name__)
+
+# Define model preprocessing.
+infer_transformation = transforms.Compose([
+    transforms.Resize(28),
+    transforms.CenterCrop(28),
+    transforms.ToTensor(),
+    transforms.Normalize((0.1307,), (0.3081,))
+])
+
+# Model inference service
+class PTVisionService(PTServingBaseService):
+
+    def __init__(self, model_name, model_path):
+        # Call the constructor of the parent class.
+        super(PTVisionService, self).__init__(model_name, model_path)
+
+        # Call the customized function to load the model.
+        self.model = Mnist(model_path)
+
+         # Load labels.
+        self.label = [0,1,2,3,4,5,6,7,8,9]
+    
+    # Receive the request data and convert it to the input format acceptable to the model.
+    def _preprocess(self, data):
+        preprocessed_data = {}
+        for k, v in data.items():
+            input_batch = []
+            for file_name, file_content in v.items():
+                with Image.open(file_content) as image1:
+                    # Gray processing
+                    image1 = image1.convert("L")
+                    if torch.cuda.is_available():
+                        input_batch.append(infer_transformation(image1).cuda())
+                    else:
+                        input_batch.append(infer_transformation(image1))
+            input_batch_var = torch.autograd.Variable(torch.stack(input_batch, dim=0), volatile=True)
+            print(input_batch_var.shape)
+            preprocessed_data[k] = input_batch_var
+
+        return preprocessed_data
+
+    # Post-process the inference result to obtain the expected output format. The result is the returned value.
+    def _postprocess(self, data):
+        results = []
+        for k, v in data.items():
+            result = torch.argmax(v[0])
+            result = {k: self.label[result]}
+            results.append(result)
+        return results
+
+    # Perform forward inference on the input data to obtain the inference result.
+    def _inference(self, data):
+
+        result = {}
+        for k, v in data.items():
+            result[k] = self.model(v)
+
+        return result
+
+# Define a network.
+class Net(nn.Module):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.conv1 = nn.Conv2d(1, 32, 3, 1)
+        self.conv2 = nn.Conv2d(32, 64, 3, 1)
+        self.dropout1 = nn.Dropout(0.25)
+        self.dropout2 = nn.Dropout(0.5)
+        self.fc1 = nn.Linear(9216, 128)
+        self.fc2 = nn.Linear(128, 10)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = F.relu(x)
+        x = self.conv2(x)
+        x = F.relu(x)
+        x = F.max_pool2d(x, 2)
+        x = self.dropout1(x)
+        x = torch.flatten(x, 1)
+        x = self.fc1(x)
+        x = F.relu(x)
+        x = self.dropout2(x)
+        x = self.fc2(x)
+        output = F.log_softmax(x, dim=1)
+        return output
+
+
+def Mnist(model_path, **kwargs):
+    # Generate a network.
+    model = Net()
+
+    # Load the model.
+    if torch.cuda.is_available():
+        device = torch.device('cuda')
+        model.load_state_dict(torch.load(model_path, map_location="cuda:0"))
+    else:
+        device = torch.device('cpu')
+        model.load_state_dict(torch.load(model_path, map_location=device))
+
+    # CPU or GPU mapping
+    model.to(device)
+
+    # Turn the model to inference mode.
+    model.eval()
+
+    return model
+

Infer the configuration file config.json on the local host. The content is as follows:

+
{
+    "model_algorithm": "image_classification",
+    "model_type": "PyTorch",
+    "runtime": "pytorch_1.8.0-cuda_10.2-py_3.7-ubuntu_18.04-x86_64"
+}
+
+

Step 3 Create an OBS Bucket and Upload Files to OBS

Upload the data, code file, inference code file, and inference configuration file obtained from Step 2 to an OBS bucket. When running a training job on ModelArts, read data and code files from the OBS bucket.

+
  1. Log in to the OBS console and create an OBS bucket and folder. Figure 4 shows an example of the created objects. For details, see Creating a Bucket and Creating a Folder.
    {OBS bucket}                     # OBS bucket name, which is customizable, for example, test-modelarts-xx
    +      -{OBS folder}          # OBS folder name, which is customizable, for example, pytorch
    +          - mnist-data      # OBS folder, which is used to store the training dataset. The folder name is customizable, for example, mnist-data.
    +          - mnist-code      # OBS folder, which is used to store training script train.py. The folder name is customizable, for example, mnist-code.
    +              - infer       # OBS folder, which is used to store inference script customize_service.py and configuration file config.json
    +          - mnist-output    # OBS folder, which is used to store trained models. The folder name is customizable, for example, mnist-output.
    +
    • The region where the created OBS bucket resides must be the same as that where ModelArts is used. Otherwise, the OBS bucket will be unavailable for training. For details, see Check whether the OBS bucket and ModelArts are in the same region.
    • When creating an OBS bucket, do not set the archive storage class. Otherwise, training models will fail.
    +
    +
    Figure 4 OBS file directory
    +

    +
  2. Upload the MNIST dataset package obtained in Step 1 Prepare the Training Data to OBS. For details, see Uploading a File.
    • When uploading data to OBS, do not encrypt the data. Otherwise, the training will fail.
    • Files do not need to be decompressed. Directly upload compressed packages to OBS.
    +
    +
    Figure 5 Uploading a dataset to the mnist-data folder
    +

    +
  3. Upload the training script train.py to the mnist-code folder.
    Figure 6 Uploading the training script train.py to the mnist-code folder
    +
  4. Upload the inference script customize_service.py and inference configuration file config.json to the infer folder.
    Figure 7 Uploading customize_service.py and config.json to the infer folder
    +
+
+

Step 4 Create a Training Job

  1. Log in to the ModelArts management console and select the same region as the OBS bucket.
  2. In the navigation pane on the left, choose Settings and check whether access authorization has been configured for the current account. For details, see Configuring Access Authorization. If you have been authorized using access keys, clear the authorization and configure agency authorization.
  3. In the navigation pane on the left, choose Training Management > Training Jobs. On the displayed page, click Create Training Job.
    Figure 8 Training Jobs
    +
  4. Set parameters.
    • Algorithm Type: Select Custom algorithm.
    • Boot Mode: Select Preset image and then select PyTorch and pytorch_1.8.0-cuda_10.2-py_3.7-ubuntu_18.04-x86_64 from the drop-down lists.
    • Code Directory: Select the created OBS code directory, for example, /test-modelarts-xx/pytorch/mnist-code/ (replace test-modelarts-xx with your OBS bucket name).
    • Boot File: Select the training script train.py uploaded to the code directory.
    • Input: Add one input and set its name to data_url. Set the data path to your OBS directory, for example, /test-modelarts-xx/pytorch/mnist-data/ (replace test-modelarts-xx with your OBS bucket name).
    • Output: Add one output and set its name to train_url. Set the data path to your OBS directory, for example, /test-modelarts-xx/pytorch/mnist-output/ (replace test-modelarts-xx with your OBS bucket name). Do not pre-download to a local directory.
    • Resource Type: Select GPU and then GPU: 1*NVIDIA-V100(16GB) | CPU: 8 vCPUs 64GB (example). If there are free GPU specifications, you can select them for training.
    • Retain default settings for other parameters.

      The sample code runs on a single node with a single card. If you select a flavor with multiple GPUs, the training will fail.

      +
      +
    +
    Figure 9 Training job settings
    +
    Figure 10 Setting training input and output
    +
    Figure 11 Configuring the resource type
    +
  5. Click Submit, confirm parameter settings for the training job, and click Yes.
  6. The system automatically switches back to the Training Jobs page. When the training job status changes to Completed, the model training is completed.

    In this case, the training job will take more than 10 minutes.

    +
    +
  7. Click the training job name. On the job details page that is displayed, check whether there are error messages in logs. If so, the training failed. Identify the cause and locate the fault based on the logs.
  8. In the lower left corner of the training details page, click the training output path to go to OBS (as shown in Figure 12). Then, check whether the model folder is available and whether there are any trained models in the folder (as shown in Figure 13). If there is no model folder or trained model, the training input may be incomplete. In this case, completely upload the training data and train the model again.
    Figure 12 Output path
    +
    Figure 13 Trained model
    +
+
+

Step 5 Deploy the Model for Inference

After the model training is complete, create an AI application and deploy it as a real-time service.

+
  1. Log in to the ModelArts management console. In the navigation pane on the left, choose AI Application Management > AI Applications. On the My AI Applications page, click Create.
  2. On the Create page, configure parameters and click Create now.

    Choose Training Job for Meta Model Source. Select the training job completed in Step 4 Create a Training Job from the drop-down list and select Dynamic loading. The values of AI Engine will be automatically configured.

    +
    Figure 14 Meta Model Source
    +

    If you have used Training Jobs of an old version, you can see both Training Jobs and Training Jobs New below Training job. In this case, select Training Jobs New.

    +
    +
  3. On the AI Applications page, if the application status changes to Normal, it has been created. Click the option button on the left of the AI application name to display the version list at the bottom of the list page, and choose Deploy > Real-Time Services in the Operation column to deploy the AI application as a real-time service.
    Figure 15 Deploying a real-time service
    +
  4. On the Deploy page, configure parameters and create a real-time service as prompted. In this example, use CPU specifications. If there are free CPU specifications, you can select them for deployment. (Each user can deploy only one real-time service for free. If you have deployed one, delete it first before deploying a new one for free.)
    Figure 16 Deploying a model
    +

    After you submit the service deployment request, the system automatically switches to the Real-Time Services page. When the service status changes to Running, the service has been deployed.

    +
    Figure 17 Deployed service
    +
+
+

Step 6 Perform Prediction

  1. On the Real-Time Services page, click the name of the real-time service. The real-time service details page is displayed.
  2. Click the Prediction tab, set Request Type to multipart/form-data, Request Parameter to image, click Upload to upload a sample image, and click Predict.

    After the prediction is complete, the prediction result is displayed in the Test Result pane. According to the prediction result, the digit on the image is 2.

    +

    The MNIST used in this case is a simple dataset used for demonstration, and its algorithms are also simple neural network algorithms used for teaching. The models generated using such data and algorithms are applicable only to teaching but not to complex prediction scenarios. The prediction is accurate only if the image used for prediction is similar to the image in the training dataset (white characters on black background).

    +
    +
    Figure 18 Example

    +
    +
    Figure 19 Prediction results
    +
+
+

Step 7 Release Resources

If you do not need to use this model and real-time service anymore, release the resources to stop billing.
  • On the Real-Time Services page, locate the row containing the target service and click Stop or Delete in the Operation column.
  • On the AI Applications page in AI Application Management, locate the row containing the target service and click Delete in the Operation column.
  • On the Training Jobs page, click Delete in the Operation column to delete the finished training job.
  • Go to OBS and delete the OBS bucket, folders, and files used in this example.
+
+
+

FAQs

+
+
+ + + \ No newline at end of file diff --git a/docs/modelarts/best-practice/modelarts_10_0150.html b/docs/modelarts/best-practice/modelarts_10_0150.html new file mode 100644 index 00000000..ba2f95c4 --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_10_0150.html @@ -0,0 +1,19 @@ + + + +

Permissions Management

+ +

+
+ +
+ +
+ diff --git a/docs/modelarts/best-practice/modelarts_24_0078.html b/docs/modelarts/best-practice/modelarts_24_0078.html new file mode 100644 index 00000000..08ca24d3 --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_24_0078.html @@ -0,0 +1,98 @@ + + +

Basic Concepts

+

ModelArts allows you to configure fine-grained permissions for refined management of resources and permissions. This is commonly used by large enterprises, but it is complex for individual users. It is recommended that individual users configure permissions for using ModelArts by referring to Assigning Permissions to Individual Users for Using ModelArts.

+

If you meet any of the following conditions, read this document.

+
  • You are an enterprise user, and
    • There are multiple departments in your enterprise, and you need to control users' permissions so that users in different departments can access only their dedicated resources and functions.
    • There are multiple roles (such as administrators, algorithm developers, and application O&M personnel) in your enterprise. You need them to use only specific functions.
    • There are logically multiple environments (such as the development environment, pre-production environment, and production environment) and are isolated from each other. You need to control users' permissions on different environments.
    • You need to control permissions of specific IAM user or user group.
    +
  • You are an individual user, and you have created multiple IAM users. You need to assign different ModelArts permissions to different IAM users.
  • You need to understand the concepts and operations of ModelArts permissions management.
+
+

ModelArts uses Identity and Access Management (IAM) for most permissions management functions. Before reading below, learn about Basic Concepts. This helps you better understand this document.

+

To implement fine-grained permissions management, ModelArts provides permission control, agency authorization, and workspace. The following describes the details.

+

ModelArts Permissions and Agencies

Figure 1 Permissions management
+

Exposed ModelArts functions are controlled through IAM permissions. For example, if you as an IAM user need to create a training job on ModelArts, you must have the modelarts:trainJob:create permission. For details about how to assign permissions to a user (you need to add the user to a user group and then assign permissions to the user group), see Permissions Management.

+

ModelArts must access other services for AI computing. For example, ModelArts must access OBS to read your data for training. For security purposes, ModelArts must be authorized to access other cloud services. This is agency authorization.

+

The following summarizes permissions management:

+ +
+

ModelArts Permissions Management

By default, new IAM users do not have any permissions assigned. You need to add a user to one or more groups, and assign permissions policies or roles to these groups. Users inherit permissions of the groups to which they are added. This process is called authorization. After authorization, users can perform operations on ModelArts based on permissions.

+

ModelArts is a project-level service deployed and accessed in specific physical regions. When you authorize an agency, you can set the scope for the permissions you select to all resources, enterprises projects, or region-specific projects. If you specify region-specific projects, the selected permissions will be applied to resources in these projects.

+

For details, see Creating a User Group and Assigning Permissions.

+
+

+

When assigning permissions to a user group, IAM does not directly assign specific permissions to the user group. Instead, IAM needs to add the permissions to a policy and then assign the policy to the user group. To facilitate user permissions management, each cloud service provides some preset policies for you to directly use. If the preset policies cannot meet your requirements of fine-grained permissions management, you can customize policies.

+
Table 1 lists all the preset system-defined policies supported by ModelArts. +
+ + + + + + + + + + + + + +
Table 1 System-defined policies supported by ModelArts

Policy

+

Description

+

Type

+

ModelArts FullAccess

+

Administrator permissions for ModelArts. Users granted these permissions can operate and use ModelArts.

+

System-defined policy

+

ModelArts CommonOperations

+

Common user permissions for ModelArts. Users granted these permissions can operate and use ModelArts, but cannot manage dedicated resource pools.

+

System-defined policy

+
+
+
+

Generally, ModelArts FullAccess is assigned only to administrators. If fine-grained management is not required, assigning ModelArts CommonOperations to all users will meet the development requirements of most small teams. If you want to customize policies for fine-grained permissions management, see IAM.

+

When you assign ModelArts permissions to a user, the system does not automatically assign the permissions of other services to the user. This ensures security and prevents unexpected unauthorized operations. In this case, however, you must separately assign permissions of different services to users so that they can perform some ModelArts operations.

+

For example, if an IAM user needs to use OBS data for training and the ModelArts training permission has been configured for the IAM user, the IAM user still needs to be assigned with the OBS read, write, and list permissions. The OBS list permission allows you to select the training data path on ModelArts. The read permission is used to preview data and read data for training. The write permission is used to save training results and logs.

+
  • For individual users or small organizations, it is a good practice to configure the Tenant Administrator policy that applies to global services for IAM users. In this way, IAM users can obtain all user permissions except IAM. However, this may cause security issues. (For an individual user, its default IAM user belongs to the admin user group and has the Tenant Administrator permission.)
  • If you want to restrict user operations, configure the minimum permissions of OBS for ModelArts users. For details, see OBS Permissions Management. For details about fine-grained permissions management of other cloud services, see the corresponding cloud service documents.
+
+
+

+

ModelArts Agency Authorization

ModelArts must be authorized by users to access other cloud services for AI computing. In the IAM permission system, such authorization is performed through agencies.

+

For details about the basic concepts and operations of agencies, see Cloud Service Delegation.

+

To simplify agency authorization, ModelArts supports automatic agency authorization configuration. You only need to configure an agency for yourself or specified users on the Global Configuration page of the ModelArts console.

+
  • Only users with the IAM agency management permission can perform this operation. Generally, members in the IAM admin user group have this permission.
  • ModelArts agency authorization is region-specific, which means that you must perform agency authorization in each region you use.
+
+
Figure 2 Settings
+

On the Global Configuration page of the ModelArts console, after you click Add Authorization, you can configure an agency for a specific user or all users. Generally, an agency named modelarts_agency_<Username>_Random ID is created by default. In the Permissions area, you can select the preset permission configuration or select the required policies. If both options cannot meet your requirements, you can create an agency on the IAM management page (you need to delegate ModelArts to access your resources), and then use an existing agency instead of adding an agency on the Add Authorization page.

+

ModelArts associates multiple users with one agency. This means that if two users need to configure the same agency, you do not need to create an agency for each user. Instead, you only need to configure the same agency for the two users.

+
Figure 3 Mapping between users and agencies
+

Each user can use ModelArts only after being associated with an agency. However, even if the permissions assigned to the agency are insufficient, no error is reported when the API is called. An error occurs only when the system uses unauthorized functions. For example, you enable message notification when creating a training job. Message notification requires SMN authorization. However, an error occurs only when messages need to be sent for the training job. The system ignores some errors, and other errors may cause job failures. When you implement permission minimization, ensure that you will still have sufficient permissions for the required operations on ModelArts.

+
+
+

Strict Authorization

In strict authorization mode, explicit authorization by the account administrator is required for IAM users to access ModelArts. The administrator can add the required ModelArts permissions to common users through authorization policies.

+

In non-strict authorization mode, IAM users can use ModelArts without explicit authorization. The administrator needs to configure the deny policy for IAM users to prevent them from using some ModelArts functions.

+

The administrator can change the authorization mode on the Global Configuration page.

+

The strict authorization mode is recommended. In this mode, IAM users must be authorized to use ModelArts functions. In this way, the permission scope of IAM users can be accurately controlled, minimizing permissions granted to IAM users.

+
+
+

Managing Resource Access Using Workspaces

Workspace enables enterprise customers to split their resources into multiple spaces that are logically isolated and to manage access to different spaces. As an enterprise user, you can submit the request for enabling the workspace function to your technical support manager.

+

After workspace is enabled, a default workspace is created. All resources you have created are in this workspace. A workspace is like a ModelArts twin. You can switch between workspaces in the upper left corner of the ModelArts console. Jobs in different workspaces do not affect each other.

+

When creating a workspace, you must bind it to an enterprise project. Multiple workspaces can be bound to the same enterprise project, but one workspace cannot be bound to multiple enterprise projects. You can use workspaces for refined restrictions on resource access and permissions of different users. The restrictions are as follows:

+ +
  • Restrictions on workspaces and permission authorization take effect at the same time. That is, a user must have both the permission to access the workspace and the permission to create training jobs (the permission applies to this workspace) so that the user can submit training jobs in this workspace.
  • If you have enabled an enterprise project but have not enabled a workspace, all operations are performed in the default enterprise project. Ensure that the permissions on the required operations apply to the default enterprise project.
  • The preceding restrictions do not apply to users who have not enabled any enterprise project.
+
+
+

Summary

Key features of ModelArts permissions management:

+ +
+
+
+ +
+ + + \ No newline at end of file diff --git a/docs/modelarts/best-practice/modelarts_24_0079.html b/docs/modelarts/best-practice/modelarts_24_0079.html new file mode 100644 index 00000000..1074db37 --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_24_0079.html @@ -0,0 +1,20 @@ + + +

Permission Management Mechanisms

+

+
+
+ + + +
+ diff --git a/docs/modelarts/best-practice/modelarts_24_0080.html b/docs/modelarts/best-practice/modelarts_24_0080.html new file mode 100644 index 00000000..069966ea --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_24_0080.html @@ -0,0 +1,212 @@ + + +

IAM

+

This section describes the IAM permission configurations for all ModelArts functions.

+

IAM Permissions

If no fine-grained authorization policy is configured for a user created by the administrator, the user has all permissions of ModelArts by default. To control user permissions, the administrator needs to add the user to a user group on IAM and configure fine-grained authorization policies for the user group. In this way, the user obtains the permissions defined in the policies before performing operations on cloud service resources.

+

You can grant users permissions by using roles and policies.

+ +

ModelArts does not support role-based authorization. It supports only policy-based authorization.

+

Policy Structure

+

A policy consists of a version and one or more statements (indicating different actions).

+
Figure 1 Policy structure
+

Policy Parameters

+

The following describes policy parameters. You can create custom policies by specifying the parameters. For details, see Custom Policy Use Cases.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Policy parameters

Parameter

+

Description

+

Value

+

Version

+

Policy version

+

1.1: indicates policy-based access control.

+

Statement: authorization statement of a policy

+

Effect

+

Whether to allow or deny the operations defined in the action

+
  • Allow: indicates the operation is allowed.
  • Deny: indicates the operation is not allowed.
    NOTE:

    If the policy used to grant user permissions contains both Allow and Deny for the same action, Deny takes precedence.

    +
    +
+

Action

+

Operation to be performed on the service

+

Format: "Service name:Resource type:Action". Wildcard characters (*) are supported, indicating all options.

+

Example:

+

modelarts:notebook:list: indicates the permission to view a notebook instance list. modelarts indicates the service name, notebook indicates the resource type, and list indicates the operation.

+

View all actions of a service in its API Reference.

+

Condition

+

Condition for a policy to take effect, including condition keys and operators

+

Format: "Condition operator:{Condition key:[Value 1,Value 2]}"

+

If you set multiple conditions, the policy takes effect only when all the conditions are met.

+

Example:

+

StringEndWithIfExists":{"g:UserName":["specialCharacter"]}: The statement is valid for users whose names end with specialCharacter.

+

Resource

+

Resources on which a policy takes effect

+

Format: Service name:Region:Account ID:Resource type:Resource path. Wildcard characters (*) are supported, indicating all resources.

+
NOTE:

ModelArts authorization does not allow you to specify a resource path.

+
+
+
+
+

ModelArts Resource Types

During policy-based authorization, the administrator can select the authorization scope based on ModelArts resource types. The following table lists the resource types supported by ModelArts:

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2 ModelArts resource types

Resource Type

+

Description

+

notebook

+

Notebook instances in DevEnviron

+

exemlProject

+

ExeML projects

+

exemlProjectInf

+

ExeML-powered real-time inference service

+

exemlProjectTrain

+

ExeML-powered training jobs

+

exemlProjectVersion

+

ExeML project version

+

workflow

+

Workflow

+

pool

+

Dedicated resource pool

+

network

+

Networking of a dedicated resource pool

+

trainJob

+

Training job

+

trainJobLog

+

Runtime logs of a training job

+

trainJobInnerModel

+

Preset model

+

trainJobVersion

+

Version of a training job (supported by old-version training jobs that will be discontinued soon)

+

trainConfig

+

Configuration of a training job (supported by old-version training jobs that will be discontinued soon)

+

tensorboard

+

Visualization job of training results (supported by old-version training jobs that will be discontinued soon)

+

model

+

Models

+

service

+

Real-time service

+

nodeservice

+

Edge service

+

workspace

+

Workspace

+

dataset

+

Dataset

+

dataAnnotation

+

Dataset labels

+

aiAlgorithm

+

Algorithm for training jobs

+

image

+

Image

+
+
+
+

ModelArts Resource Permissions

For details, see "Permissions Policies and Supported Actions" in ModelArts API Reference.

+ +
+
+
+ +
+ + + \ No newline at end of file diff --git a/docs/modelarts/best-practice/modelarts_24_0081.html b/docs/modelarts/best-practice/modelarts_24_0081.html new file mode 100644 index 00000000..fe1d0f62 --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_24_0081.html @@ -0,0 +1,1075 @@ + + +

Agencies and Dependencies

+

Function Dependency

Function Dependency Policies

+

When using ModelArts to develop algorithms or manage training jobs, you are required to use other Cloud services. For example, before submitting a training job, select an OBS path for storing the dataset and logs, respectively. Therefore, when configuring fine-grained authorization policies for a user, the administrator must configure dependent permissions so that the user can use required functions.

+

If you use ModelArts as the root user (default IAM user with the same name as the account), the root user has all permissions by default.

+
+ +
+ + + + + + + + + + + + + + + + + + + + + +
Table 1 Basic configuration

Application Scenario

+

Dependent Service

+

Dependent Policy

+

Supported Function

+

Global configuration

+

IAM

+

iam:users:listUsers

+

Obtain a user list. This action is required by the administrator only.

+

Basic function

+

IAM

+

iam:tokens:assume

+

(Mandatory) Use an agency to obtain temporary authentication credentials.

+

Basic function

+

BSS

+

bss:balance:view

+

Show the balance of the current account on the page after resources are created on the ModelArts console.

+
+
+ +
+ + + + + + + + + + + + + + + +
Table 2 Managing workspaces

Application Scenario

+

Dependent Service

+

Dependent Policy

+

Supported Function

+

Workspace

+

IAM

+

iam:users:listUsers

+

Authorize an IAM user to use a workspace.

+

ModelArts

+

modelarts:*:*delete*

+

Clear resources in a workspace when deleting it.

+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 3 Managing notebook instances

Application Scenario

+

Dependent Service

+

Dependent Policy

+

Supported Function

+

Lifecycle management of development environment instances

+

ModelArts

+

modelarts:notebook:create

+

modelarts:notebook:list

+

modelarts:notebook:get

+

modelarts:notebook:update

+

modelarts:notebook:delete

+

modelarts:notebook:start

+

modelarts:notebook:stop

+

modelarts:notebook:updateStopPolicy

+

modelarts:image:delete

+

modelarts:image:list

+

modelarts:image:create

+

modelarts:image:get

+

modelarts:pool:list

+

modelarts:tag:list

+

modelarts:network:get

+

aom:metric:get

+

aom:metric:list

+

aom:alarm:list

+

Start, stop, create, delete, and update an instance.

+

Dynamically mounting storage

+

+

ModelArts

+

modelarts:notebook:listMountedStorages

+

modelarts:notebook:mountStorage

+

modelarts:notebook:getMountedStorage

+

modelarts:notebook:umountStorage

+

Dynamically mount storage.

+

OBS

+

obs:bucket:ListAllMyBuckets

+

obs:bucket:ListBucket

+

Image management

+

ModelArts

+

modelarts:image:register

+

modelarts:image:listGroup

+

Register and view an image on the Image Management page.

+

Saving an image

+

SWR

+

SWR Admin

+

The SWR Admin policy contains the maximum scope of SWR permissions, which can be used to:

+
  • Save a running development environment instance as an image.
  • Create a notebook instance using a custom image.
+

Using the SSH function

+

ECS

+

ecs:serverKeypairs:list

+

ecs:serverKeypairs:get

+

ecs:serverKeypairs:delete

+

ecs:serverKeypairs:create

+

Configure a login key for a notebook instance.

+

Mounting an SFS Turbo file system

+

SFS Turbo

+

SFS Turbo FullAccess

+

Read and write an SFS directory as an IAM user. Mount an SFS file system that is not created by you to a notebook instance using a dedicated resource pool.

+

Viewing all Instances

+

ModelArts

+

modelarts:notebook:listAllNotebooks

+

View development environment instances of all users on the ModelArts management console. This action is required by the development environment instance administrator.

+

IAM

+

iam:users:listUsers

+

Local VS Code plug-in or PyCharm Toolkit

+

ModelArts

+

modelarts:notebook:listAllNotebooks

+

modelarts:trainJob:create

+

modelarts:trainJob:list

+

modelarts:trainJob:update

+

modelarts:trainJobVersion:delete

+

modelarts:trainJob:get

+

modelarts:trainJob:logExport

+

modelarts:workspace:getQuotas (This policy is required if the workspace function is enabled.)

+

Access a notebook instance from local VS Code and submit training jobs.

+

OBS

+

obs:bucket:ListAllMybuckets

+

obs:bucket:HeadBucket

+

obs:bucket:ListBucket

+

obs:bucket:GetBucketLocation

+

obs:object:GetObject

+

obs:object:GetObjectVersion

+

obs:object:PutObject

+

obs:object:DeleteObject

+

obs:object:DeleteObjectVersion

+

obs:object:ListMultipartUploadParts

+

obs:object:AbortMultipartUpload

+

obs:object:GetObjectAcl

+

obs:object:GetObjectVersionAcl

+

obs:bucket:PutBucketAcl

+

obs:object:PutObjectAcl

+

obs:object:ModifyObjectMetaData

+

IAM

+

iam:projects:listProjects

+

Obtain an IAM project list through local PyCharm for access configurations.

+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 4 Managing training jobs

Application Scenario

+

Dependent Service

+

Dependent Policy

+

Supported Function

+

Training management

+

+

ModelArts

+

+

+

modelarts:trainJob:*

+

modelarts:trainJobLog:*

+

modelarts:aiAlgorithm:*

+

modelarts:image:list

+

Create a training job and view training logs.

+

modelarts:workspace:getQuotas

+

Obtain a workspace quota. This policy is required if the workspace function is enabled.

+

modelarts:tag:list

+

Use Tag Management Service (TMS) in a training job.

+

IAM

+

iam:credentials:listCredentials

+

iam:agencies:listAgencies

+

Use the configured agency authorization.

+

SFS Turbo

+

sfsturbo:shares:getShare

+

sfsturbo:shares:getAllShares

+

Use SFS Turbo in a training job.

+

SWR

+

swr:repository:listTags

+

swr:repository:getRepository

+

swr:repository:listRepositories

+

Use a custom image to create a training job.

+

SMN

+

smn:topic:publish

+

smn:topic:list

+

Notify training job status changes through SMN.

+

OBS

+

obs:bucket:ListAllMybuckets

+

obs:bucket:HeadBucket

+

obs:bucket:ListBucket

+

obs:bucket:GetBucketLocation

+

obs:object:GetObject

+

obs:object:GetObjectVersion

+

obs:object:PutObject

+

obs:object:DeleteObject

+

obs:object:DeleteObjectVersion

+

obs:object:ListMultipartUploadParts

+

obs:object:AbortMultipartUpload

+

obs:object:GetObjectAcl

+

obs:object:GetObjectVersionAcl

+

obs:bucket:PutBucketAcl

+

obs:object:PutObjectAcl

+

obs:object:ModifyObjectMetaData

+

Run a training job using a dataset in an OBS bucket.

+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 5 Using workflows

Application Scenario

+

Dependent Service

+

Dependent Policy

+

Supported Function

+

Using a dataset

+

ModelArts

+

modelarts:dataset:getDataset

+

modelarts:dataset:createDataset

+

modelarts:dataset:createDatasetVersion

+

modelarts:dataset:createImportTask

+

modelarts:dataset:updateDataset

+

modelarts:processTask:createProcessTask

+

modelarts:processTask:getProcessTask

+

modelarts:dataset:listDatasets

+

Use ModelArts datasets in a workflow.

+

Managing AI applications

+

ModelArts

+

modelarts:model:list

+

modelarts:model:get

+

modelarts:model:create

+

modelarts:model:delete

+

modelarts:model:update

+

Manage ModelArts AI applications in a workflow.

+

Deploying a service

+

ModelArts

+

modelarts:service:get

+

modelarts:service:create

+

modelarts:service:update

+

modelarts:service:delete

+

modelarts:service:getLogs

+

Manage ModelArts real-time services in a workflow.

+

Training jobs

+

ModelArts

+

modelarts:trainJob:get

+

modelarts:trainJob:create

+

modelarts:trainJob:list

+

modelarts:trainJobVersion:list

+

modelarts:trainJobVersion:create

+

modelarts:trainJob:delete

+

modelarts:trainJobVersion:delete

+

modelarts:trainJobVersion:stop

+

Manage ModelArts training jobs in a workflow.

+

Workspace

+

ModelArts

+

modelarts:workspace:get

+

modelarts:workspace:getQuotas

+

Use ModelArts workspaces in a workflow.

+

Managing data

+

OBS

+

obs:bucket:ListAllMybuckets (Obtaining a bucket list)

+

obs:bucket:HeadBucket (Obtaining bucket metadata)

+

obs:bucket:ListBucket (Listing objects in a bucket)

+

obs:bucket:GetBucketLocation (Obtaining the bucket location)

+

obs:object:GetObject (Obtaining object content and metadata)

+

obs:object:GetObjectVersion (Obtaining object content and metadata)

+

obs:object:PutObject (Uploading objects using PUT method, uploading objects using POST method, copying objects, appending an object, initializing a multipart task, uploading parts, and merging parts)

+

obs:object:DeleteObject (Deleting an object or batch deleting objects)

+

obs:object:DeleteObjectVersion (Deleting an object or batch deleting objects)

+

obs:object:ListMultipartUploadParts (Listing uploaded parts)

+

obs:object:AbortMultipartUpload (Aborting multipart uploads)

+

obs:object:GetObjectAcl (Obtaining an object ACL)

+

obs:object:GetObjectVersionAcl (Obtaining an object ACL)

+

obs:bucket:PutBucketAcl (Configuring a bucket ACL)

+

obs:object:PutObjectAcl (Configuring an object ACL)

+

Use OBS data in a workflow.

+

Executing a workflow

+

IAM

+

iam:users:listUsers (Obtaining users)

+

iam:agencies:getAgency (Obtaining details about a specified agency)

+

iam:tokens:assume (Obtaining an agency token)

+

Call other ModelArts services when the workflow is running.

+

Integrating DLI

+

DLI

+

dli:jobs:get (Obtaining job details)

+

dli:jobs:list_all (Viewing a job list)

+

dli:jobs:create (Creating a job)

+

Integrate DLI into a workflow.

+

Integrating MRS

+

MRS

+

mrs:job:get (Obtaining job details)

+

mrs:job:submit (Creating and executing a job)

+

mrs:job:list (Viewing a job list)

+

mrs:job:stop (Stopping a job)

+

mrs:job:batchDelete (Batch deleting jobs)

+

mrs:file:list (Viewing a file list)

+

Integrate MRS into a workflow.

+
+
+ +
+ + + + + + + + + + + + + + + +
Table 6 Managing AI applications

Application Scenario

+

Dependent Service

+

Dependent Policy

+

Supported Function

+

Managing AI applications

+

+

SWR

+

swr:repository:deleteRepository

+

swr:repository:deleteTag

+

swr:repository:getRepository

+

swr:repository:listTags

+

Import a model from a custom image.

+

Use a custom engine when importing a model from OBS.

+

OBS

+

obs:bucket:ListAllMybuckets (Obtaining a bucket list)

+

obs:bucket:HeadBucket (Obtaining bucket metadata)

+

obs:bucket:ListBucket (Listing objects in a bucket)

+

obs:bucket:GetBucketLocation (Obtaining the bucket location)

+

obs:object:GetObject (Obtaining object content and metadata)

+

obs:object:GetObjectVersion (Obtaining object content and metadata)

+

obs:object:PutObject (Uploading objects using PUT method, uploading objects using POST method, copying objects, appending an object, initializing a multipart task, uploading parts, and merging parts)

+

obs:object:DeleteObject (Deleting an object or batch deleting objects)

+

obs:object:DeleteObjectVersion (Deleting an object or batch deleting objects)

+

obs:object:ListMultipartUploadParts (Listing uploaded parts)

+

obs:object:AbortMultipartUpload (Aborting multipart uploads)

+

obs:object:GetObjectAcl (Obtaining an object ACL)

+

obs:object:GetObjectVersionAcl (Obtaining an object ACL)

+

obs:bucket:PutBucketAcl (Configuring a bucket ACL)

+

obs:object:PutObjectAcl (Configuring an object ACL)

+

Import a model from a template.

+

Specify an OBS path for model conversion.

+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
Table 7 Managing service deployment

Application Scenario

+

Dependent Service

+

Dependent Policy

+

Supported Function

+

Deploying a service

+

LTS

+

lts:logs:list (Obtaining the log list)

+

Show LTS logs.

+

Batch services

+

OBS

+

obs:object:GetObject (Obtaining object content and metadata)

+

obs:object:PutObject (Uploading objects using PUT method, uploading objects using POST method, copying objects, appending an object, initializing a multipart task, uploading parts, and merging parts)

+

obs:bucket:CreateBucket (Creating a bucket)

+

obs:bucket:ListBucket (Listing objects in a bucket)

+

obs:bucket:ListAllMyBuckets (Obtaining a bucket list)

+

Create a batch service.

+

Edge services

+

+

CES

+

ces:metricData:list: (Obtaining metric data)

+

View monitoring metrics.

+

IEF

+

ief:deployment:delete (Deleting a deployment)

+

Manage edge services.

+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 8 Managing datasets

Application Scenario

+

Dependent Service

+

Dependent Policy

+

Supported Function

+

Managing datasets and labels

+

OBS

+

obs:bucket:ListBucket (Listing objects in a bucket)

+

obs:object:GetObject (Obtaining object content and metadata)

+

obs:object:PutObject (Uploading objects using PUT method, uploading objects using POST method, copying objects, appending an object, initializing a multipart task, uploading parts, and merging parts)

+

obs:object:DeleteObject (Deleting an object or batch deleting objects)

+

obs:bucket:HeadBucket (Obtaining bucket metadata)

+

obs:bucket:GetBucketAcl (Obtaining a bucket ACL)

+

obs:bucket:PutBucketAcl (Configuring a bucket ACL)

+

obs:bucket:GetBucketPolicy (Obtaining a bucket policy)

+

obs:bucket:PutBucketPolicy (Configuring a bucket policy)

+

obs:bucket:DeleteBucketPolicy (Deleting a bucket policy)

+

obs:bucket:PutBucketCORS (Configuring or deleting CORS rules of a bucket)

+

obs:bucket:GetBucketCORS (Obtaining the CORS rules of a bucket)

+

obs:object:PutObjectAcl (Configuring an object ACL)

+

Manage datasets in OBS.

+

Label OBS data.

+

Create a data management job.

+

Managing table datasets

+

DLI

+

dli:database:displayAllDatabases

+

dli:database:displayAllTables

+

dli:table:describe_table

+

Manage DLI data in a dataset.

+

Managing table datasets

+

DWS

+

dws:openAPICluster:list

+

dws:openAPICluster:getDetail

+

Manage DWS data in a dataset.

+

Managing table datasets

+

MRS

+

mrs:job:submit

+

mrs:job:list

+

mrs:cluster:list

+

mrs:cluster:get

+

Manage MRS data in a dataset.

+

Auto labeling

+

ModelArts

+

modelarts:service:list

+

modelarts:model:list

+

modelarts:model:get

+

modelarts:model:create

+

modelarts:trainJobInnerModel:list

+

modelarts:workspace:get

+

modelarts:workspace:list

+

Enable auto labeling.

+

Team labeling

+

IAM

+

iam:projects:listProjects (Obtaining tenant projects)

+

iam:users:listUsers (Obtaining users)

+

iam:agencies:createAgency (Creating an agency)

+

iam:quotas:listQuotasForProject (Obtaining the quotas of a project)

+

Manage labeling teams.

+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 9 Managing resources

Application Scenario

+

Dependent Service

+

Dependent Policy

+

Supported Function

+

Managing resource pools

+

+

BSS

+

bss:coupon:view

+

bss:order:view

+

bss:balance:view

+

bss:discount:view

+

bss:renewal:view

+

bss:bill:view

+

bss:contract:update

+

bss:order:pay

+

bss:unsubscribe:update

+

bss:renewal:update

+

bss:order:update

+

Create, renew, and unsubscribe from a resource pool. Dependent permissions must be configured in the IAM project view.

+

ECS

+

ecs:availabilityZones:list

+

Show AZs. Dependent permissions must be configured in the IAM project view.

+

Network management

+

+

+

VPC

+

vpc:routes:create

+

vpc:routes:list

+

vpc:routes:get

+

vpc:routes:delete

+

vpc:peerings:create

+

vpc:peerings:accept

+

vpc:peerings:get

+

vpc:peerings:delete

+

vpc:routeTables:update

+

vpc:routeTables:get

+

vpc:routeTables:list

+

vpc:vpcs:create

+

vpc:vpcs:list

+

vpc:vpcs:get

+

vpc:vpcs:delete

+

vpc:subnets:create

+

vpc:subnets:get

+

vpc:subnets:delete

+

vpcep:endpoints:list

+

vpcep:endpoints:create

+

vpcep:endpoints:delete

+

vpcep:endpoints:get

+

vpc:ports:create

+

vpc:ports:get

+

vpc:ports:update

+

vpc:ports:delete

+

vpc:networks:create

+

vpc:networks:get

+

vpc:networks:update

+

vpc:networks:delete

+

Create and delete ModelArts networks, and interconnect VPCs. Dependent permissions must be configured in the IAM project view.

+

SFS Turbo

+

sfsturbo:shares:addShareNic

+

sfsturbo:shares:deleteShareNic

+

sfsturbo:shares:showShareNic

+

sfsturbo:shares:listShareNics

+

Interconnect your network with SFS Turbo. Dependent permissions must be configured in the IAM project view.

+

Edge resource pool

+

IEF

+

ief:node:list

+

ief:group:get

+

ief:application:list

+

ief:application:get

+

ief:node:listNodeCert

+

ief:node:get

+

ief:IEFInstance:get

+

ief:deployment:list

+

ief:group:listGroupInstanceState

+

ief:IEFInstance:list

+

ief:deployment:get

+

ief:group:list

+

Add, delete, modify, and search for edge pools

+
+
+

+
+

Agency authorization

To simplify operations when you use ModelArts to run jobs, certain operations are automatically performed on the ModelArts backend, for example, downloading the datasets in an OBS bucket to a workspace before a training job is started and dumping training job logs to the OBS bucket.

+

ModelArts does not save your token authentication credentials. Before performing operations on your resources (such as OBS buckets) in a backend asynchronous job, you are required to explicitly authorize ModelArts through an IAM agency. ModelArts will use the agency to obtain a temporary authentication credential for performing operations on your resources. For details, see Adding Authorization.

+
Figure 1 Agency authorization
+

As shown in Figure 1, after authorization is configured on ModelArts, ModelArts uses the temporary credential to access and operate your resources, relieving you from some complex and time-consuming operations. The agency credential will also be synchronized to your jobs (including notebook instances and training jobs). You can use the agency credential to access your resources in the jobs.

+
+

You can use either of the following methods to authorize ModelArts using an agency:

+

One-click authorization

+

ModelArts provides one-click automatic authorization. You can quickly configure agency authorization on the Global Configuration page of ModelArts. Then, ModelArts will automatically create an agency for you and configure it in ModelArts.

+

In this mode, the authorization scope is specified based on the preset system policies of dependent services to ensure sufficient permissions for using services. The created agency has almost all permissions of dependent services. If you want to precisely control the scope of permissions granted to an agency, use the second method.

+

Custom authorization

+

The administrator creates different agency authorization policies for different users in IAM, and configures the created agency for ModelArts users. When creating an agency for an IAM user, the administrator specifies the minimum permissions for the agency based on the user's permissions to control the resources that the user can access when they use ModelArts. For details, see Assigning Basic Permissions for Using ModelArts.

+

+

Risks in Unauthorized Operations

+

The agency authorization of a user is independent. Theoretically, the agency authorization scope of a user can be beyond the authorization scope of the authorization policy configured for the user group. Any improper configuration will result in unauthorized operations.

+

To prevent unauthorized operations, only a tenant administrator is allowed to configure agencies for users in the ModelArts global configuration to ensure the security of agency authorization.

+

+

Minimal Agency Authorization

+

When configuring agency authorization, an administrator must strictly control the authorization scope.

+

ModelArts asynchronously and automatically performs operations such as job preparation and clearing. The required agency authorization is within the basic authorization scope. If you use only some functions of ModelArts, the administrator can filter out the basic permissions that are not used according to the agency authorization configuration. Conversely, if you need to obtain resource permissions beyond the basic authorization scope in a job, the administrator can add new permissions to the agency authorization configuration. In a word, the agency authorization scope must be minimized and customized based on service requirements.

+

+

Basic Agency Authorization Scope

+

To customize the permissions for an agency, select permissions based on your service requirements.

+ +
+ + + + + + + + + + + + + + + + + + + +
Table 10 Basic agency authorization for a development environment

Application Scenario

+

Dependent Service

+

Agency Authorization

+

Description

+

Configuration Suggestion

+

JupyterLab

+

OBS

+

obs:object:DeleteObject

+

obs:object:GetObject

+

obs:object:GetObjectVersion

+

obs:bucket:CreateBucket

+

obs:bucket:ListBucket

+

obs:bucket:ListAllMyBuckets

+

obs:object:PutObject

+

obs:bucket:GetBucketAcl

+

obs:bucket:PutBucketAcl

+

obs:bucket:PutBucketCORS

+

Use OBS to upload and download data in JupyterLab through ModelArts notebook.

+

Recommended

+

Development environment monitoring

+

AOM

+

aom:alarm:put

+

Call the AOM API to obtain monitoring data and events of notebook instances and display them in ModelArts notebook.

+

Recommended

+
+
+ +
+ + + + + + + + + + + +
Table 11 Basic agency authorization for training jobs

Application Scenario

+

Dependent Service

+

Agency Authorization

+

Description

+

Training jobs

+

OBS

+

obs:bucket:ListBucket

+

obs:object:GetObject

+

obs:object:PutObject

+

Download data, models, and code before starting a training job.

+

Upload logs and models when a training job is running.

+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + +
Table 12 Basic agency authorization for deploying services

Application Scenario

+

Dependent Service

+

Agency Authorization

+

Description

+

Real-time services

+

LTS

+

lts:groups:create

+

lts:groups:list

+

lts:topics:create

+

lts:topics:delete

+

lts:topics:list

+

Configure LTS for reporting logs of real-time services.

+

Batch services

+

OBS

+

obs:bucket:ListBucket

+

obs:object:GetObject

+

obs:object:PutObject

+

Run a batch service.

+

Edge services

+

IEF

+

ief:deployment:list

+

ief:deployment:create

+

ief:deployment:update

+

ief:deployment:delete

+

ief:node:createNodeCert

+

ief:iefInstance:list

+

ief:node:list

+

Deploy an edge service using IEF.

+
+
+ +
+ + + + + + + + + + + + + + + + +
Table 13 Basic agency authorization for managing data

Application Scenario

+

Dependent Service

+

Agency Authorization

+

Description

+

Dataset and data labeling

+

OBS

+

obs:object:GetObject

+

obs:object:PutObject

+

obs:object:DeleteObject

+

obs:object:PutObjectAcl

+

obs:bucket:ListBucket

+

obs:bucket:HeadBucket

+

obs:bucket:GetBucketAcl

+

obs:bucket:PutBucketAcl

+

obs:bucket:GetBucketPolicy

+

obs:bucket:PutBucketPolicy

+

obs:bucket:DeleteBucketPolicy

+

obs:bucket:PutBucketCORS

+

obs:bucket:GetBucketCORS

+

Manage datasets in an OBS bucket.

+

Labeling data

+

ModelArts inference

+

modelarts:service:get

+

modelarts:service:create

+

modelarts:service:update

+

Perform auto labeling based on ModelArts inference.

+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
Table 14 Basic agency authorization for managing dedicated resource pools

Application Scenario

+

Dependent Service

+

Agency Authorization

+

Description

+

Network management (New version)

+

+

VPC

+

vpc:routes:create

+

vpc:routes:list

+

vpc:routes:get

+

vpc:routes:delete

+

vpc:peerings:create

+

vpc:peerings:accept

+

vpc:peerings:get

+

vpc:peerings:delete

+

vpc:routeTables:update

+

vpc:routeTables:get

+

vpc:routeTables:list

+

vpc:vpcs:create

+

vpc:vpcs:list

+

vpc:vpcs:get

+

vpc:vpcs:delete

+

vpc:subnets:create

+

vpc:subnets:get

+

vpc:subnets:delete

+

vpcep:endpoints:list

+

vpcep:endpoints:create

+

vpcep:endpoints:delete

+

vpcep:endpoints:get

+

vpc:ports:create

+

vpc:ports:get

+

vpc:ports:update

+

vpc:ports:delete

+

vpc:networks:create

+

vpc:networks:get

+

vpc:networks:update

+

vpc:networks:delete

+

Create and delete ModelArts networks, and interconnect VPCs. Dependent permissions must be configured in the IAM project view.

+

SFS Turbo

+

sfsturbo:shares:addShareNic

+

sfsturbo:shares:deleteShareNic

+

sfsturbo:shares:showShareNic

+

sfsturbo:shares:listShareNics

+

Interconnect your network with SFS Turbo. Dependent permissions must be configured in the IAM project view.

+

Managing resource pools

+

BSS

+

bss:coupon:view

+

bss:order:view

+

bss:balance:view

+

bss:discount:view

+

bss:renewal:view

+

bss:bill:view

+

bss:contract:update

+

bss:order:pay

+

bss:unsubscribe:update

+

bss:renewal:update

+

bss:order:update

+

Create, renew, and unsubscribe from a resource pool. Dependent permissions must be configured in the IAM project view.

+

Managing resource pools

+

ECS

+

ecs:availabilityZones:list

+

Show AZs. Dependent permissions must be configured in the IAM project view.

+
+
+
+
+ +
+ diff --git a/docs/modelarts/best-practice/modelarts_24_0082.html b/docs/modelarts/best-practice/modelarts_24_0082.html new file mode 100644 index 00000000..614daaec --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_24_0082.html @@ -0,0 +1,17 @@ + + +

Workspace

+

ModelArts allows you to create multiple workspaces to develop algorithms and manage and deploy models for different service objectives. In this way, the development outputs of different applications are allocated to different workspaces for simplified management.

+

Workspace supports the following types of access control:

+ +

A default workspace is allocated to each IAM project of each account. The access control of the default workspace is PUBLIC.

+

Workspace access control allows the access of only certain users. This function can be used in the following scenarios:

+ +

As an enterprise user, you can submit the request for enabling the workspace function to your technical support.

+
+
+ +
+ diff --git a/docs/modelarts/best-practice/modelarts_24_0084.html b/docs/modelarts/best-practice/modelarts_24_0084.html new file mode 100644 index 00000000..84c20404 --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_24_0084.html @@ -0,0 +1,26 @@ + + +

Configuration Practices in Typical Scenarios

+

+
+
+ + + +
+ diff --git a/docs/modelarts/best-practice/modelarts_24_0085.html b/docs/modelarts/best-practice/modelarts_24_0085.html new file mode 100644 index 00000000..22cd3403 --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_24_0085.html @@ -0,0 +1,72 @@ + + +

Assigning Permissions to Individual Users for Using ModelArts

+

Certain ModelArts functions require access to Object Storage Service (OBS), Software Repository for Container (SWR), and Intelligent EdgeFabric (IEF). Before using ModelArts, your account must be authorized to access these services. Otherwise, these functions will be unavailable.

+

Constraints

+
+

Adding Authorization

  1. Log in to the ModelArts management console. In the left navigation pane, choose Settings. The Global Configuration page is displayed.
  2. Click Add Authorization. On the Add Authorization page that is displayed, configure the parameters. +
    + + + + + + + + + + + + + + + + + + + + + + +
    Table 1 Parameters

    Parameter

    +

    Description

    +

    Authorized User

    +

    Options: IAM user, Federated user, Agency, and All users

    +
    • IAM user: You can use a tenant account to create IAM users and assign permissions for specific resources. Each IAM user has their own identity credentials (password and access keys) and uses cloud resources based on assigned permissions. For details about IAM users, see IAM User.
    • Federated user: A federated user is also called a virtual enterprise user. For details about federated users, see Configuring Federated Identity Authentication.
    • Agency: You can create agencies in IAM. For details about how to create an agency, see Creating an Agency .
    • All users: If you select this option, the agency permissions will be granted to all IAM users under the current account, including those created in the future. For individual users, choose All users.
    +

    Authorized To

    +

    This parameter is not displayed when Authorized User is set to All users.

    +
    • IAM user: Select an IAM user and configure an agency for the IAM user.
      Figure 1 Selecting an IAM user
      +
    • Federated user: Enter the username or user ID of the target federated user.
      Figure 2 Selecting a federated user
      +
    • Agency: Select an agency name. You can use account A to create an agency and configure the agency for account B. When using account B, you can switch the role in the upper right corner of the console to account A and use the agency permissions of account A.
      Figure 3 Switch Role
      +
    +

    Agency

    +
    • Use existing: If there are agencies in the list, select an available one to authorize the selected user. Click the drop-down arrow next to an agency name to view its permission details.
    • Add agency: If there is no available agency, create one. If you use ModelArts for the first time, select Add agency.
    +

    Add agency > Agency Name

    +

    The system automatically creates a changeable agency name.

    +

    Add agency > Permissions > Common User

    +

    Common User provides the permissions to use all basic ModelArts functions. For example, you can access data, and create and manage training jobs. Select this option generally.

    +

    Click View permissions to view common user permissions.

    +

    Add agency > Permissions > Custom

    +

    If you need refined permissions management, select Custom to flexibly assign permissions to the created agency. You can select permissions from the permission list as required.

    +
    +
    +
  3. Select I have read and agree to the ModelArts Service Statement. Click Create.
+
+

Viewing Authorized Permissions

You can view the configured authorizations on the Global Configuration page. Click View Permissions in the Authorization Content column to view the permission details.

+
Figure 4 View Permissions
+
Figure 5 View Permissions
+
+
+
+ +
+ + + \ No newline at end of file diff --git a/docs/modelarts/best-practice/modelarts_24_0086.html b/docs/modelarts/best-practice/modelarts_24_0086.html new file mode 100644 index 00000000..0920a10f --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_24_0086.html @@ -0,0 +1,24 @@ + + +

Assigning Basic Permissions for Using ModelArts

+

+
+
+ + + +
+ diff --git a/docs/modelarts/best-practice/modelarts_24_0089.html b/docs/modelarts/best-practice/modelarts_24_0089.html new file mode 100644 index 00000000..f3a5c568 --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_24_0089.html @@ -0,0 +1,27 @@ + + +

Step 1 Create a User Group and Add Users to the User Group

+

Multiple IAM users can be created under a tenant user, and the permissions of the IAM users are managed by group. This section describes how to create a user group and IAM users and add the IAM users to the user group.

+
  1. Log in to the management console as a tenant user, hover over your username in the upper right corner, and choose Identity and Access Management from the drop-down list to switch to the IAM management console.
    Figure 1 Identity and Access Management
    +

    +
  2. Create a user group. In the navigation pane on the left, choose User Groups. Click Create User Group in the upper right corner. Then, set Name to UserGroup-2 and click OK.
    Figure 2 Creating a user group
    +

    After the user group is created, the system automatically switches to the user group list. Then, you can add existing IAM users to the user group through user group management. If there is no existing IAM user, create users and add them to the user group.

    +
  3. Create IAM users and add them to the user group. In the navigation pane on the left, choose Users. On the displayed page, click Create User in the upper right corner. On the Create User page, add multiple users.
    Set parameters as prompted and click Next.
    Figure 3 Creating multiple users
    +
    +
  4. On the Add User to Group page, select UserGroup-2 and click Create.
    Figure 4 Adding users to the target user group
    +

    The system will automatically add the two users to the target group one by one.

    +
+
+
+ +
+ + + \ No newline at end of file diff --git a/docs/modelarts/best-practice/modelarts_24_0090.html b/docs/modelarts/best-practice/modelarts_24_0090.html new file mode 100644 index 00000000..564dfb68 --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_24_0090.html @@ -0,0 +1,39 @@ + + +

Step 2 Assigning Permissions for Using Cloud Services

+

An IAM user can use cloud services such as ModelArts and OBS only after they are assigned with permissions from the tenant user. This section describes how to assign the permissions to use cloud services to all IAM users in a user group.

+
  1. On the user group list page of IAM, click Authorize of the target user group. The Authorize User Group page is displayed.
    Figure 1 Authorize
    +
    Figure 2 Authorize User Group
    +
  2. Before assigning permissions, learn about minimum permissions requirements of each ModelArts module, as shown in Table 1.
  3. Assign permissions for using ModelArts. Search for ModelArts. Select either ModelArts FullAccess or ModelArts CommonOperations.
    The differences between the options are as follows:
    • The users with the ModelArts CommonOperations permission can only use resources, but cannot create, update, or delete any dedicated resource pool. You are advised to assign this permission to IAM users.
    • The users with the ModelArts FullAccess permission have all access permissions, including creating, updating, and deleting dedicated resource pools. Exercise caution when selecting this option.
    +
    Figure 3 Assigning permissions for using ModelArts
    +
    +
  4. Assign permissions for using OBS. Search for OBS and select OBS Administrator. ModelArts training jobs use OBS for forwarding data. Therefore, the permissions for using OBS are required.
    Figure 4 Assigning permissions for using OBS
    +
  5. Assign permissions for using SWR. Search for SWR and select SWR FullAccess. ModelArts custom images require the SWR FullAccess permission.
    Figure 5 Assigning permissions for using SWR
    +
  6. (Optional) Assign the key management permission. Remote SSH of ModelArts notebook requires the key management permission. Search for DEW and select DEW KeypairFullAccess.

    DEW key management permission is configured in the following regions: CN North-Beijing1, CN North-Beijing4, CN East-Shanghai1, CN East-Shanghai2, CN South-Guangzhou, CN Southwest-Guiyang1, CN-Hong Kong, and AP-Singapore. In other regions, the KMS key management permission is configured. In this example, the CN-Hong Kong region is used. Therefore, the DEW key management permission is to be configured.

    +
    Figure 6 DEW key management permission
    +
    Figure 7 KMS key management permission
    +
  7. (Optional) Assign permissions for using IEF. ModelArts requires the Tenant Administrator permission so that edge services depending on IEF can be used.

    Tenant Administrator has the permission to manage all cloud services, not only the ModelArts service. Exercise caution when assigning the Tenant Administrator permission.

    +
    Figure 8 Assigning permissions for using IEF
    +
  8. (Optional) Assign permissions for using Cloud Eye and SMN. On the details page of a ModelArts real-time service deployed for inference, the number of calls is available. Click View Details to obtain more information. If you want to view the overall running status of ModelArts real-time services and AI application loads on Cloud Eye, assign Cloud Eye permissions to IAM users.

    To view monitoring data only, select CES ReadOnlyAccess.

    +
    Figure 9 CES ReadOnlyAccess
    +

    To set alarm monitoring on Cloud Eye, you also need to add CES FullAccess and SMN permissions.

    +
    Figure 10 Assigning alarm monitoring permissions
    +
    Figure 11 Assigning permissions for using SMN
    +
  9. (Optional) Assign permissions for using VPC. To enable custom network configuration when creating a dedicated resource pool, assign permissions for using VPC.
    Figure 12 Assigning permissions for using VPC
    +
  10. (Optional) Assign permissions for using SFS and SFS Turbo. To mount an SFS system to a dedicated resource pool as the storage for the development environment or training, assign the permission to use the SFS system.
    Figure 13 Assigning permissions for using SFS and SFS Turbo
    +
  11. Click View Selected in the upper left corner and confirm the selected permissions.
    Figure 14 Viewing selected permissions
    +
  12. Click Next and set the minimum authorization scope. Select Region-specific projects, select the region to be authorized, and click OK.
  13. A message is displayed, indicating that the authorization is successful. View the authorization information and click Finish. It takes 15 to 30 minutes for the authorization to take effect.
+
+
+ +
+ + + \ No newline at end of file diff --git a/docs/modelarts/best-practice/modelarts_24_0091.html b/docs/modelarts/best-practice/modelarts_24_0091.html new file mode 100644 index 00000000..d68caa75 --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_24_0091.html @@ -0,0 +1,23 @@ + + +

Step 3 Configure Agent-based ModelArts Access Authorization for the User

+

After assigning IAM permissions, configure ModelArts access authorization for IAM users on the ModelArts page so that ModelArts can access dependent services such as OBS, SWR, and IEF.

+

In agent-based ModelArts access authorization, only tenant users are allowed to configure for their IAM users. Therefore, in this example, the administrator needs to configure access authorization for all the IAM users.

+
  1. Use the tenant account to log in to the ModelArts management console. Select your region in the upper left corner.
  2. In the navigation pane on the left, choose Settings. The Global Configuration page is displayed.
  3. Click Add Authorization. On the Add Authorization page, set Authorized User to All users and select Add agency to configure the agency-based authorization for all IAM users under the tenant account.
    • Common User: You can use basic ModelArts functions, for example, accessing data and creating and managing training jobs, but not to manage resources. Select this option generally.
    • Custom: You can flexibly assign permissions to the created agency. Select this option for refined permissions management. You can select permissions from the permission list as required.
      Figure 1 Common user permissions
      +
    +
  4. Select I have read and agree to the ModelArts Service Statement. Click Create.
    Figure 2 Configured agency authorization
    +
+
+
+ +
+ + + \ No newline at end of file diff --git a/docs/modelarts/best-practice/modelarts_24_0092.html b/docs/modelarts/best-practice/modelarts_24_0092.html new file mode 100644 index 00000000..f5f47e2a --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_24_0092.html @@ -0,0 +1,27 @@ + + +

Step 4 Verify User Permissions

+

It takes 15 to 30 minutes for the permissions configured in 4 to take effect. Therefore, wait for 30 minutes after the configuration and then verify the configuration.

+
  1. Log in to the ModelArts management console as an IAM in UserGroup-2. On the login page, ensure that IAM User Login is selected.

    Change the password as prompted upon the first login.

    +
  2. Check ModelArts permissions.
    1. Select the target region in the upper left corner, which must be the same as that in the authorization configuration.

      +
    2. In the navigation pane on the left of the ModelArts management console, choose DevEnviron > Notebook. The ModelArts permissions and agency authorization are configured correctly if no message shows insufficient permissions.
      If the information shown in the following figure is displayed, the ModelArts agency authorization has not been configured. In this case, follow the instructions provided in Step 3 Configure Agent-based ModelArts Access Authorization for the User to configure the authorization.
      Figure 1 Insufficient permissions
      +
      +
    3. In the navigation pane on the left of the ModelArts management console, choose DevEnviron > Notebook and click Create. If this operation is successful, you have obtained ModelArts operation permissions.

      Alternatively, you can try other functions, such as Training Management > Training Jobs. If the operation is successful, you can use ModelArts properly.

      +
    +
  3. Verify OBS permissions.
    1. In the service list in the upper left corner, select OBS. The OBS management console is displayed.
    2. Click Create Bucket in the upper right corner. If this operation is successful, you have obtained OBS operation permissions.
    +
  4. Verify SWR permissions.
    1. In the service list in the upper left corner, select SWR. The SWR management console is displayed.
    2. If an SWR page can be properly displayed, you have obtained SWR operation permissions.
    +
  5. Verify other optional permissions.
  6. Experience ModelArts.
+
+
+ +
+ + + \ No newline at end of file diff --git a/docs/modelarts/best-practice/modelarts_24_0093.html b/docs/modelarts/best-practice/modelarts_24_0093.html new file mode 100644 index 00000000..125462c5 --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_24_0093.html @@ -0,0 +1,109 @@ + + +

Separately Assigning Permissions to Administrators and Developers

+

In small- and medium-sized teams, administrators need to globally control ModelArts resources, and developers only need to focus on their own instances. By default, a developer account does not have the te_admin permission. The tenant account must configure the required permissions. This section uses notebook as an example to describe how to assign different permissions to administrators and developers through custom policies.

+

Scenarios

To develop a project using notebook, administrators need full control permissions for using ModelArts dedicated resource pools, and access and operation permissions on all notebook instances.

+

To use development environments, developers only need operation permissions for using their own instances and dependent services. They do not need to perform operations on ModelArts dedicated resource pools or view notebook instances of other users.

+
Figure 1 Account relationships
+
+

Configuring Permissions for an Administrator

Assign full control permissions to administrators for using ModelArts dedicated resource pools and all notebook instances. The procedure is as follows:

+
  1. Use a tenant account to create an administrator user group ModelArts_admin_group and add administrator accounts to ModelArts_admin_group. For details, see Step 1 Create a User Group and Add Users to the User Group.
  2. Create a custom policy.

    1. Log in to the management console using an administrator account, hover over your username in the upper right corner, and click Identity and Access Management from the drop-down list to switch to the IAM management console.
      Figure 2 Identity and Access Management
      +
    2. Create custom policy 1 and assign IAM and OBS permissions to the user. In the navigation pane of the IAM console, choose Permissions > Policies/Roles. Click Create Custom Policy in the upper right corner. On the displayed page, enter Policy1_IAM_OBS for Policy Name, select JSON for Policy View, configure the policy content, and click OK.
      Figure 3 Custom policy 1
      +
      The custom policy Policy1_IAM_OBS is as follows, which grants IAM and OBS operation permissions to the user. You can directly copy and paste the content.
      { 
      +    "Version": "1.1", 
      +    "Statement": [ 
      +        { 
      +            "Effect": "Allow", 
      +            "Action": [ 
      +                "iam:users:listUsers",
      +                "iam:projects:listProjects",
      +                "obs:object:PutObject",
      +                "obs:object:GetObject",
      +                "obs:object:GetObjectVersion",
      +                "obs:bucket:HeadBucket",
      +                "obs:object:DeleteObject",
      +                "obs:bucket:CreateBucket",
      +                "obs:bucket:ListBucket"
      +                ] 
      +        }
      +    ] 
      +}
      +
      +
    3. Repeat 2.b to create custom policy 2 and grant the user the permissions to perform operations on dependent services ECS, SWR, MRS, and SMN as well as ModelArts. Set Policy Name to Policy2_AllowOperation and Policy View to JSON, configure the policy content, and click OK.
      The custom policy Policy2_AllowOperation is as follows, which grants the user the permissions to perform operations on dependent services ECS, SWR, MRS, and SMN as well as ModelArts. You can directly copy and paste the content.
      { 
      +    "Version": "1.1", 
      +    "Statement": [ 
      +        { 
      +            "Effect": "Allow", 
      +            "Action": [ 
      +                "ecs:serverKeypairs:list",
      +                "ecs:serverKeypairs:get",
      +                "ecs:serverKeypairs:delete",
      +                "ecs:serverKeypairs:create",
      +                "swr:repository:getNamespace",
      +                "swr:repository:listNamespaces",
      +                "swr:repository:deleteTag",
      +                "swr:repository:getRepository",
      +                "swr:repository:listTags",
      +                "swr:instance:createTempCredential",
      +                "mrs:cluster:get",
      +                "modelarts:*:*"
      +            ] 
      +        }
      +    ] 
      +}
      +
      +
    +

  3. Grant the policy created in 2 to the administrator group ModelArts_admin_group.

    1. In the navigation pane of the IAM console, choose User Groups. On the User Groups page, locate the row that contains ModelArts_admin_group, click Authorize in the Operation column, and select Policy1_IAM_OBS and Policy2_AllowOperation. Click Next.
      Figure 4 Select Policy/Role
      +
    2. Specify the scope as All resources and click OK.
      Figure 5 Select Scope
      +
    +

  4. Configure agent-based ModelArts access authorization for an administrator to allow ModelArts to access dependent services such as OBS.

    1. Log in to the ModelArts management console using a tenant account. In the navigation pane, choose Settings. The Global Configuration page is displayed.
    2. Click Add Authorization. On the Add Authorization page, set Authorized User to IAM user, select an administrator account for Authorized To, select Add agency, and select Common User for Permissions. Permissions control is not required for administrators, so use default setting Common User.
      Figure 6 Configuring authorization for an administrator
      +
    3. Select I have read and agree to the ModelArts Service Statement. Click Create.
    +

  5. Test administrator permissions.

    1. Log in to the ModelArts management console as the administrator. On the login page, ensure that IAM User Login is selected.

      Change the password as prompted upon the first login.

      +
    2. In the navigation pane of the ModelArts management console, choose Dedicated Resource Pools and click Create. If the console does not display a message indicating insufficient permissions, the permissions have been assigned to the administrator.
    +

+
+

Configuring Permissions for a Developer

Use IAM for fine-grained control of developer permissions. The procedure is as follows:

+
  1. Use a tenant account to create a developer user group user_group and add developer accounts to user_group. For details, see Step 1 Create a User Group and Add Users to the User Group.
  2. Create a custom policy.

    1. Log in to the management console using a tenant account, hover over your username in the upper right corner, and click Identity and Access Management from the drop-down list to switch to the IAM management console.
      Figure 7 Identity and Access Management
      +
    2. Create custom policy 3 to prevent users from performing operations on ModelArts dedicated resource pools and viewing notebook instances of other users.

      In the navigation pane of the IAM console, choose Permissions > Policies/Roles. Click Create Custom Policy in the upper right corner. On the displayed page, enter Policy3_DenyOperation for Policy Name, select JSON for Policy View, configure the policy content, and click OK.

      +
      The custom policy Policy3_DenyOperation is as follows. You can copy and paste the content.
      { 
      +    "Version": "1.1", 
      +    "Statement": [ 
      +       	{
      +            "Effect": "deny", 
      +            "Action": [
      +                "modelarts:pool:create",
      +                "modelarts:pool:update",
      +                "modelarts:pool:delete",
      +		"modelarts:notebook:listAllNotebooks"
      +            ]			
      +
      +		}
      +    ] 
      +}
      +
      +
    +

  3. Grant the custom policy to the developer user group user_group.

    1. In the navigation pane of the IAM console, choose User Groups. On the User Groups page, locate the row that contains user_group, click Authorize in the Operation column, and select Policy1_IAM_OBS, Policy2_AllowOperation, and Policy3_DenyOperation. Click Next.
      Figure 8 Select Policy/Role
      +
    2. Specify the scope as All resources and click OK.
      Figure 9 Select Scope
      +
    +

  4. Configure agent-based ModelArts access authorization for a developer to allow ModelArts to access dependent services such as OBS.

    1. Log in to the ModelArts management console using a tenant account. In the navigation pane, choose Settings. The Global Configuration page is displayed.
    2. Click Add Authorization. On the Add Authorization page, set Authorized User to IAM user, select a developer account for Authorized To, add an agency ma_agency_develop_user, set Permissions to Custom, and select OBS Administrator. Developers only need OBS authorization to allow developers to access OBS when using notebook.
      Figure 10 Configuring authorization for a developer
      +
    3. Click Create.
    4. On the Global Configuration page, click Add Authorization again. On the Add Authorization page that is displayed, configure an agency for other developer users.

      On the Add Authorization page, set Authorized User to IAM user, select a developer account for Authorized To, and select the existing agency ma_agency_develop_user created before.

      +
    +

  5. Test developer permissions.

    1. Log in to the ModelArts management console as an IAM user in user_group. On the login page, ensure that IAM User Login is selected.

      Change the password as prompted upon the first login.

      +
    2. In the navigation pane of the ModelArts management console, choose Dedicated Resource Pools and click Create. If the console does not display a message indicating insufficient permissions, the permissions have been assigned to the developer.
      Figure 11 Insufficient permissions
      +
    +

+
+
+
+ +
+ + + \ No newline at end of file diff --git a/docs/modelarts/best-practice/modelarts_24_0095.html b/docs/modelarts/best-practice/modelarts_24_0095.html new file mode 100644 index 00000000..06905787 --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_24_0095.html @@ -0,0 +1,33 @@ + + +

Viewing the Notebook Instances of All IAM Users Under One Tenant Account

+

Any IAM user granted with the listAllNotebooks and listUsers permissions can click View all on the notebook page to view the instances of all users in the current IAM project.

+

Users granted with these permissions can also access OBS and SWR of all users in the current IAM project.

+
+

Assigning the Required Permissions

  1. Log in to the management console as a tenant user, hover the cursor over your username in the upper right corner, and choose Identity and Access Management from the drop-down list to switch to the IAM management console.
  2. On the IAM console, choose Permissions > Policies/Roles from the navigation pane, click Create Custom Policy in the upper right corner, and create two policies.
    Policy 1: Create a policy that allows users to view all notebook instances of an IAM project, as shown in Figure 1.
    • Policy Name: Enter a custom policy name, for example, Viewing all notebook instances.
    • Policy View: Select Visual editor.
    • Policy Content: Select Allow, ModelArts Service, modelarts:notebook:listAllNotebooks, and default resources.
      Figure 1 Creating a custom policy
      +
    +
    +

    +

    Policy 2: Create a policy that allows users to view all users of an IAM project.

    +
    • Policy Name: Enter a custom policy name, for example, Viewing all users of the current IAM project.
    • Policy View: Select Visual editor.
    • Policy Content: Select Allow, Identity and Access Management, iam:users:listUsers, and default resources.
    +
  3. In the navigation pane, choose User Groups. On the User Groups page, locate the row containing the target user group and click Authorize in the Operation column. On the Authorize User Group page, select the custom policy created in 2 and click Next. Then, select the scope and click OK.

    After the configuration, all users in the user group have the permission to view all notebook instances created by users in the user group.

    +

    If no user group is available, create one, add users to it through user group management, and configure authorization for the user group. If the target user is not in a user group, add the user to a user group through user group management.

    +
+
+

Enabling an IAM User to Start Other User's Notebook Instance

If an IAM user wants to access another IAM user's notebook instance through remote SSH, they need to update the SSH key pair to their own. Otherwise, error ModelArts.6786 will be reported. For details about how to update a key pair, see Modifying the SSH Configuration for a Notebook Instance.

+

ModelArts.6789: Failed to find SSH key pair KeyPair-xxx on the ECS key pair page. Update the key pair and try again later.

+
+
+
+ +
+ + + \ No newline at end of file diff --git a/docs/modelarts/best-practice/modelarts_24_0096.html b/docs/modelarts/best-practice/modelarts_24_0096.html new file mode 100644 index 00000000..b64ad606 --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_24_0096.html @@ -0,0 +1,33 @@ + + +

Logging In to a Training Container Using Cloud Shell

+

Application Scenarios

You can use Cloud Shell provided by the ModelArts console to log in to a running training container.

+
+

Constraints

You can use Cloud Shell to log in to a running training container using a dedicated resource pool.

+
Figure 1 Selecting a dedicated resource pool when creating a training job
+
Figure 2 A running training job
+
+

Preparation: Assigning the Cloud Shell Permission to an IAM User

  1. Log in to the management console as a tenant user, hover the cursor over your username in the upper right corner, and choose Identity and Access Management from the drop-down list to switch to the IAM management console.
  2. On the IAM console, choose Permissions > Policies/Roles from the navigation pane, click Create Custom Policy in the upper right corner, and configure the following parameters.
    • Policy Name: Enter a custom policy name, for example, Using Cloud Shell to log in to a running training container.
    • Policy View: Select Visual editor.
    • Policy Content: Select Allow, ModelArts Service, modelarts:trainJob:exec, and default resources.
      Figure 3 Creating a custom policy
      +
    +
  3. In the navigation pane, choose User Groups. Then, click Authorize in the Operation column of the target user group. On the Authorize User Group page, select the custom policies created in 2, and click Next. Then, select the scope and click OK.

    After the configuration, all users in the user group have the permission to use Cloud Shell to log in to a running training container.

    +

    If no user group is available, create one, add users to it through user group management, and configure authorization for the user group. If the target user is not in a user group, add the user to a user group through user group management.

    +
+
+

Using Cloud Shell

  1. Configure parameters based on Preparation: Assigning the Cloud Shell Permission to an IAM User.
  2. On the ModelArts console, choose Training Management > Training Jobs. Go to the details page of the target training job and log in to the training container on the Cloud Shell tab.

    Verify that the login is successful, as shown in the following figure.

    +
    Figure 4 Cloud Shell
    +
+
+
+
+ +
+ + + \ No newline at end of file diff --git a/docs/modelarts/best-practice/modelarts_24_0097.html b/docs/modelarts/best-practice/modelarts_24_0097.html new file mode 100644 index 00000000..138af980 --- /dev/null +++ b/docs/modelarts/best-practice/modelarts_24_0097.html @@ -0,0 +1,46 @@ + + +

Prohibiting a User from Using a Public Resource Pool

+

This section describes how to control the ModelArts permissions of a user so that the user is not allowed to use a public resource pool to create training jobs, create notebook instances, or deploy inference services.

+

Context

Through permission control, ModelArts dedicated resource pool users can be prohibited from using a public resource pool to create training jobs, create notebook instances, or deploy inference services.

+

To control the permissions, configure the following permission policy items:

+ +
+

Procedure

  1. Log in to the management console as a tenant user, hover the cursor over your username in the upper right corner, and choose Identity and Access Management from the drop-down list to switch to the IAM management console.
  2. In the navigation pane, choose Permissions > Policies/Roles. On the Policies/Roles page, click Create Custom Policy in the upper right corner, configure parameters, and click OK.
    • Policy Name: Configure the policy name.
    • Policy View: Select Visual editor or JSON.
    • Policy Content: Select Deny. In Select service, search for ModelArts and select it. In ReadWrite under Actions, search for modelarts:trainJob:create, modelarts:notebook:create, and modelarts:service:create and select them. All: Retain the default setting. In Add request condition, click Add Request Condition. In the displayed dialog box, set Condition Key to modelarts:poolType, Operator to StringEquals, and Value to public.
      The policy content in JSON view is as follows:
      {
      +    "Version": "1.1",
      +    "Statement": [
      +        {
      +            "Effect": "Deny",
      +            "Action": [
      +                "modelarts:trainJob:create",
      +                "modelarts:notebook:create",
      +                "modelarts:service:create"
      +            ],
      +            "Condition": {
      +                "StringEquals": {
      +                    "modelarts:poolType": [
      +                        "public"
      +                    ]
      +                }
      +            }
      +        }
      +    ]
      +}
      +
      +
    +
  3. In the navigation pane, choose User Groups. On the User Groups page, locate the row containing the target user group and click Authorize in the Operation column. On the Authorize User Group page, select the custom policy created in 2 and click Next. Then, select the scope and click OK.

    After the configuration, all users in the user group have the permission to view all notebook instances created by users in the user group.

    +

    If no user group is available, create one, add users to it through user group management, and configure authorization for the user group. If the target user is not in a user group, add the user to a user group through user group management.

    +
  4. Add the policy to the user's agency authorization. This prevents the user from breaking the permission scope through a token on the tenant plane.

    In the navigation pane, choose Agencies. Locate the agency used by the user group on ModelArts and click Modify in the Operation column. On the Permissions tab page, click Authorize, select the created custom policy, and click Next. Select the scope for authorization and click OK.

    +
+
+

Verification

Log in to the ModelArts console as an IAM user, choose Training Management > Training Jobs, and click Create Training Job. On the page for creating a training job, only a dedicated resource pool can be selected for Resource Pool.

+

Log in to the ModelArts console as an IAM user, choose DevEnviron > Notebook, and click Create. On the page for creating a notebook instance, only a dedicated resource pool can be selected for Resource Pool.

+

Log in to the ModelArts console as an IAM user, choose Service Deployment > Real-Time Services, and click Deploy. On the page for service deployment, only a dedicated resource pool can be selected for Resource Pool.

+
+
+
+ +
+ diff --git a/docs/modelarts/best-practice/public_sys-resources/caution_3.0-en-us.png b/docs/modelarts/best-practice/public_sys-resources/caution_3.0-en-us.png new file mode 100644 index 00000000..60f60762 Binary files /dev/null and b/docs/modelarts/best-practice/public_sys-resources/caution_3.0-en-us.png differ diff --git a/docs/modelarts/best-practice/public_sys-resources/danger_3.0-en-us.png b/docs/modelarts/best-practice/public_sys-resources/danger_3.0-en-us.png new file mode 100644 index 00000000..47a9c723 Binary files /dev/null and b/docs/modelarts/best-practice/public_sys-resources/danger_3.0-en-us.png differ diff --git a/docs/modelarts/best-practice/public_sys-resources/delta.gif b/docs/modelarts/best-practice/public_sys-resources/delta.gif new file mode 100644 index 00000000..0d1b1f67 Binary files /dev/null and b/docs/modelarts/best-practice/public_sys-resources/delta.gif differ diff --git a/docs/modelarts/best-practice/public_sys-resources/deltaend.gif b/docs/modelarts/best-practice/public_sys-resources/deltaend.gif new file mode 100644 index 00000000..cc7da0fc Binary files /dev/null and b/docs/modelarts/best-practice/public_sys-resources/deltaend.gif differ diff --git a/docs/modelarts/best-practice/public_sys-resources/icon-arrowdn.gif b/docs/modelarts/best-practice/public_sys-resources/icon-arrowdn.gif new file mode 100644 index 00000000..37942803 Binary files /dev/null and b/docs/modelarts/best-practice/public_sys-resources/icon-arrowdn.gif differ diff --git a/docs/modelarts/best-practice/public_sys-resources/icon-arrowrt.gif b/docs/modelarts/best-practice/public_sys-resources/icon-arrowrt.gif new file mode 100644 index 00000000..6aaaa11c Binary files /dev/null and b/docs/modelarts/best-practice/public_sys-resources/icon-arrowrt.gif differ diff --git a/docs/modelarts/best-practice/public_sys-resources/icon-caution.gif b/docs/modelarts/best-practice/public_sys-resources/icon-caution.gif new file mode 100644 index 00000000..079c79b2 Binary files /dev/null and b/docs/modelarts/best-practice/public_sys-resources/icon-caution.gif differ diff --git a/docs/modelarts/best-practice/public_sys-resources/icon-danger.gif b/docs/modelarts/best-practice/public_sys-resources/icon-danger.gif new file mode 100644 index 00000000..079c79b2 Binary files /dev/null and b/docs/modelarts/best-practice/public_sys-resources/icon-danger.gif differ diff --git a/docs/modelarts/best-practice/public_sys-resources/icon-huawei.gif b/docs/modelarts/best-practice/public_sys-resources/icon-huawei.gif new file mode 100644 index 00000000..a31d60f8 Binary files /dev/null and b/docs/modelarts/best-practice/public_sys-resources/icon-huawei.gif differ diff --git a/docs/modelarts/best-practice/public_sys-resources/icon-note.gif b/docs/modelarts/best-practice/public_sys-resources/icon-note.gif new file mode 100644 index 00000000..31be2b03 Binary files /dev/null and b/docs/modelarts/best-practice/public_sys-resources/icon-note.gif differ diff --git a/docs/modelarts/best-practice/public_sys-resources/icon-notice.gif b/docs/modelarts/best-practice/public_sys-resources/icon-notice.gif new file mode 100644 index 00000000..40907065 Binary files /dev/null and b/docs/modelarts/best-practice/public_sys-resources/icon-notice.gif differ diff --git a/docs/modelarts/best-practice/public_sys-resources/icon-tip.gif b/docs/modelarts/best-practice/public_sys-resources/icon-tip.gif new file mode 100644 index 00000000..c47bae05 Binary files /dev/null and b/docs/modelarts/best-practice/public_sys-resources/icon-tip.gif differ diff --git a/docs/modelarts/best-practice/public_sys-resources/icon-warning.gif b/docs/modelarts/best-practice/public_sys-resources/icon-warning.gif new file mode 100644 index 00000000..079c79b2 Binary files /dev/null and b/docs/modelarts/best-practice/public_sys-resources/icon-warning.gif differ diff --git a/docs/modelarts/best-practice/public_sys-resources/imageclose.gif b/docs/modelarts/best-practice/public_sys-resources/imageclose.gif new file mode 100644 index 00000000..3a3344af Binary files /dev/null and b/docs/modelarts/best-practice/public_sys-resources/imageclose.gif differ diff --git a/docs/modelarts/best-practice/public_sys-resources/imageclosehover.gif b/docs/modelarts/best-practice/public_sys-resources/imageclosehover.gif new file mode 100644 index 00000000..8699d5e3 Binary files /dev/null and b/docs/modelarts/best-practice/public_sys-resources/imageclosehover.gif differ diff --git a/docs/modelarts/best-practice/public_sys-resources/imagemax.gif b/docs/modelarts/best-practice/public_sys-resources/imagemax.gif new file mode 100644 index 00000000..99c07dc2 Binary files /dev/null and b/docs/modelarts/best-practice/public_sys-resources/imagemax.gif differ diff --git a/docs/modelarts/best-practice/public_sys-resources/imagemaxhover.gif b/docs/modelarts/best-practice/public_sys-resources/imagemaxhover.gif new file mode 100644 index 00000000..d01d77d6 Binary files /dev/null and b/docs/modelarts/best-practice/public_sys-resources/imagemaxhover.gif differ diff --git a/docs/modelarts/best-practice/public_sys-resources/macFFBgHack.png b/docs/modelarts/best-practice/public_sys-resources/macFFBgHack.png new file mode 100644 index 00000000..ec811470 Binary files /dev/null and b/docs/modelarts/best-practice/public_sys-resources/macFFBgHack.png differ diff --git a/docs/modelarts/best-practice/public_sys-resources/note_3.0-en-us.png b/docs/modelarts/best-practice/public_sys-resources/note_3.0-en-us.png new file mode 100644 index 00000000..57a0e1f5 Binary files /dev/null and b/docs/modelarts/best-practice/public_sys-resources/note_3.0-en-us.png differ diff --git a/docs/modelarts/best-practice/public_sys-resources/notice_3.0-en-us.png b/docs/modelarts/best-practice/public_sys-resources/notice_3.0-en-us.png new file mode 100644 index 00000000..fa4b6499 Binary files /dev/null and b/docs/modelarts/best-practice/public_sys-resources/notice_3.0-en-us.png differ diff --git a/docs/modelarts/best-practice/public_sys-resources/warning_3.0-en-us.png b/docs/modelarts/best-practice/public_sys-resources/warning_3.0-en-us.png new file mode 100644 index 00000000..def5c356 Binary files /dev/null and b/docs/modelarts/best-practice/public_sys-resources/warning_3.0-en-us.png differ