From 13aa021d1d3de8ac0833add202f2c7004f68f245 Mon Sep 17 00:00:00 2001 From: "Su, Xiaomeng" Date: Wed, 26 Mar 2025 09:05:18 +0000 Subject: [PATCH] ocr_api_20250311 Reviewed-by: Pruthi, Vineet Co-authored-by: Su, Xiaomeng Co-committed-by: Su, Xiaomeng --- docs/ocr/api-ref/ALL_META.TXT.json | 54 +- docs/ocr/api-ref/CLASS.TXT.json | 40 +- docs/ocr/api-ref/ocr_03_0014.html | 2 + docs/ocr/api-ref/ocr_03_0029.html | 10 + docs/ocr/api-ref/ocr_03_0031.html | 2 +- docs/ocr/api-ref/ocr_03_0042.html | 21 +- docs/ocr/api-ref/ocr_03_0043.html | 2 +- docs/ocr/api-ref/ocr_03_0047.html | 5 + docs/ocr/api-ref/ocr_03_0060.html | 2 +- docs/ocr/api-ref/ocr_03_0062.html | 2 +- docs/ocr/api-ref/ocr_03_0063.html | 6 +- docs/ocr/api-ref/ocr_03_0161.html | 884 +++++++++++++++++++++++++++++ docs/ocr/api-ref/ocr_03_0162.html | 200 +++++++ 13 files changed, 1198 insertions(+), 32 deletions(-) create mode 100644 docs/ocr/api-ref/ocr_03_0161.html create mode 100644 docs/ocr/api-ref/ocr_03_0162.html diff --git a/docs/ocr/api-ref/ALL_META.TXT.json b/docs/ocr/api-ref/ALL_META.TXT.json index dee60c76b..7f05c13fc 100644 --- a/docs/ocr/api-ref/ALL_META.TXT.json +++ b/docs/ocr/api-ref/ALL_META.TXT.json @@ -61,7 +61,7 @@ "node_id":"ocr_03_0062.xml", "product_code":"ocr", "code":"4", - "des":"An endpoint is the request address used to call an API. Different services have different endpoints for different regions. You can query all service endpoints at Regions ", + "des":"An endpoint is the request address for calling an API. Endpoints vary depending on services and regions. For more information, see Regions and Endpoints.", "doc_type":"api", "kw":"Endpoint,Before You Start,API Reference", "search_title":"", @@ -81,7 +81,7 @@ "code":"5", "des":"Only images in PNG, JPG, JPEG, BMP, or TIFF format can be recognized.No side of the image can be smaller than 15 or larger than 8,192 pixels.The area to be recognized mus", "doc_type":"api", - "kw":"Constraints and Limitations,Before You Start,API Reference", + "kw":"Notes and Constraints,Before You Start,API Reference", "search_title":"", "metedata":[ { @@ -89,7 +89,7 @@ "prodname":"ocr" } ], - "title":"Constraints and Limitations", + "title":"Notes and Constraints", "githuburl":"" }, { @@ -151,7 +151,7 @@ "node_id":"ocr_03_0043.xml", "product_code":"ocr", "code":"9", - "des":"Log in to the OCR management console.Select a region based on your business needs. For details about the regions where services are deployed, see Regions and Endpoints.Se", + "des":"Log in to the OCR management console.Select a region based on service requirements. For details about the regions where services are deployed, see Regions and Endpoints.S", "doc_type":"api", "kw":"Subscribing to an OCR Service,API Calling,API Reference", "search_title":"", @@ -272,11 +272,47 @@ "title":"General Table", "githuburl":"" }, + { + "uri":"ocr_03_0161.html", + "node_id":"ocr_03_0161.xml", + "product_code":"ocr", + "code":"16", + "des":"This API recognizes text, analyzes layout, extracts key-value pairs, identifies tables in various formatted documents such as certificates, receipts, and forms, and conve", + "doc_type":"api", + "kw":"Smart Document Recognizer,API,API Reference", + "search_title":"", + "metedata":[ + { + "documenttype":"api", + "prodname":"ocr" + } + ], + "title":"Smart Document Recognizer", + "githuburl":"" + }, + { + "uri":"ocr_03_0162.html", + "node_id":"ocr_03_0162.xml", + "product_code":"ocr", + "code":"17", + "des":"This section describes how you can use Identity and Access Management (IAM) for fine-grained permissions management of your OCR resources. If your account does not need i", + "doc_type":"api", + "kw":"Permissions Policies and Supported Actions,API Reference", + "search_title":"", + "metedata":[ + { + "documenttype":"api", + "prodname":"ocr" + } + ], + "title":"Permissions Policies and Supported Actions", + "githuburl":"" + }, { "uri":"ocr_03_0048.html", "node_id":"ocr_03_0048.xml", "product_code":"ocr", - "code":"16", + "code":"18", "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", "doc_type":"api", "kw":"Common Parameters", @@ -294,7 +330,7 @@ "uri":"ocr_03_0090.html", "node_id":"ocr_03_0090.xml", "product_code":"ocr", - "code":"17", + "code":"19", "des":"An HTTP status code consists of three digits, which is classified into five categories: 1xx: related information; 2xx: operation successful; 3xx: redirection; 4xx: client", "doc_type":"api", "kw":"Status Codes,Common Parameters,API Reference", @@ -312,7 +348,7 @@ "uri":"ocr_03_0028.html", "node_id":"ocr_03_0028.xml", "product_code":"ocr", - "code":"18", + "code":"20", "des":"No data will be returned if an API fails to be called. You can locate the error cause based on the error code of each API. When an API call fails, HTTPS status code 4xx o", "doc_type":"api", "kw":"Error Codes,Common Parameters,API Reference", @@ -330,7 +366,7 @@ "uri":"ocr_03_0130.html", "node_id":"ocr_03_0130.xml", "product_code":"ocr", - "code":"19", + "code":"21", "des":"A project ID or project name is required in some API requests. You need to obtain the project ID and name before calling an API.Log in to the management console.In the up", "doc_type":"api", "kw":"Obtaining the Project ID,Common Parameters,API Reference", @@ -348,7 +384,7 @@ "uri":"ocr_03_0029.html", "node_id":"ocr_03_0029.xml", "product_code":"ocr", - "code":"20", + "code":"22", "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", "doc_type":"api", "kw":"Change History,API Reference", diff --git a/docs/ocr/api-ref/CLASS.TXT.json b/docs/ocr/api-ref/CLASS.TXT.json index 19e30760a..957d2f681 100644 --- a/docs/ocr/api-ref/CLASS.TXT.json +++ b/docs/ocr/api-ref/CLASS.TXT.json @@ -27,7 +27,7 @@ "code":"3" }, { - "desc":"An endpoint is the request address used to call an API. Different services have different endpoints for different regions. You can query all service endpoints at Regions ", + "desc":"An endpoint is the request address for calling an API. Endpoints vary depending on services and regions. For more information, see Regions and Endpoints.", "product_code":"ocr", "title":"Endpoint", "uri":"ocr_03_0062.html", @@ -38,7 +38,7 @@ { "desc":"Only images in PNG, JPG, JPEG, BMP, or TIFF format can be recognized.No side of the image can be smaller than 15 or larger than 8,192 pixels.The area to be recognized mus", "product_code":"ocr", - "title":"Constraints and Limitations", + "title":"Notes and Constraints", "uri":"ocr_03_0063.html", "doc_type":"api", "p_code":"1", @@ -72,7 +72,7 @@ "code":"8" }, { - "desc":"Log in to the OCR management console.Select a region based on your business needs. For details about the regions where services are deployed, see Regions and Endpoints.Se", + "desc":"Log in to the OCR management console.Select a region based on service requirements. For details about the regions where services are deployed, see Regions and Endpoints.S", "product_code":"ocr", "title":"Subscribing to an OCR Service", "uri":"ocr_03_0043.html", @@ -134,6 +134,24 @@ "p_code":"13", "code":"15" }, + { + "desc":"This API recognizes text, analyzes layout, extracts key-value pairs, identifies tables in various formatted documents such as certificates, receipts, and forms, and conve", + "product_code":"ocr", + "title":"Smart Document Recognizer", + "uri":"ocr_03_0161.html", + "doc_type":"api", + "p_code":"13", + "code":"16" + }, + { + "desc":"This section describes how you can use Identity and Access Management (IAM) for fine-grained permissions management of your OCR resources. If your account does not need i", + "product_code":"ocr", + "title":"Permissions Policies and Supported Actions", + "uri":"ocr_03_0162.html", + "doc_type":"api", + "p_code":"", + "code":"17" + }, { "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", "product_code":"ocr", @@ -141,7 +159,7 @@ "uri":"ocr_03_0048.html", "doc_type":"api", "p_code":"", - "code":"16" + "code":"18" }, { "desc":"An HTTP status code consists of three digits, which is classified into five categories: 1xx: related information; 2xx: operation successful; 3xx: redirection; 4xx: client", @@ -149,8 +167,8 @@ "title":"Status Codes", "uri":"ocr_03_0090.html", "doc_type":"api", - "p_code":"16", - "code":"17" + "p_code":"18", + "code":"19" }, { "desc":"No data will be returned if an API fails to be called. You can locate the error cause based on the error code of each API. When an API call fails, HTTPS status code 4xx o", @@ -158,8 +176,8 @@ "title":"Error Codes", "uri":"ocr_03_0028.html", "doc_type":"api", - "p_code":"16", - "code":"18" + "p_code":"18", + "code":"20" }, { "desc":"A project ID or project name is required in some API requests. You need to obtain the project ID and name before calling an API.Log in to the management console.In the up", @@ -167,8 +185,8 @@ "title":"Obtaining the Project ID", "uri":"ocr_03_0130.html", "doc_type":"api", - "p_code":"16", - "code":"19" + "p_code":"18", + "code":"21" }, { "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", @@ -177,6 +195,6 @@ "uri":"ocr_03_0029.html", "doc_type":"api", "p_code":"", - "code":"20" + "code":"22" } ] \ No newline at end of file diff --git a/docs/ocr/api-ref/ocr_03_0014.html b/docs/ocr/api-ref/ocr_03_0014.html index 84d0b49d1..7805bca0a 100644 --- a/docs/ocr/api-ref/ocr_03_0014.html +++ b/docs/ocr/api-ref/ocr_03_0014.html @@ -8,6 +8,8 @@ + diff --git a/docs/ocr/api-ref/ocr_03_0029.html b/docs/ocr/api-ref/ocr_03_0029.html index 418c3f635..0ed0d549f 100644 --- a/docs/ocr/api-ref/ocr_03_0029.html +++ b/docs/ocr/api-ref/ocr_03_0029.html @@ -13,6 +13,16 @@

This issue is the first official release.

+

2024-11-15

+ +

Changed the default value of the language parameter in the General Text OCR API. If this parameter is not specified, German and English are recognized by default.

+ + +

2025-03-04

+ +
  • Enhanced the functions of General Text OCR to support the recognition of images in PNG, JPG, JPEG, BMP, GIF, TIFF, WebP, PCX, ICO, PSD, or PDF format. Added the recognition of Cambodian and Hebrew.
  • Added the Smart Document Recognizer API.
+ + diff --git a/docs/ocr/api-ref/ocr_03_0031.html b/docs/ocr/api-ref/ocr_03_0031.html index ec4daf771..3bd3444f7 100644 --- a/docs/ocr/api-ref/ocr_03_0031.html +++ b/docs/ocr/api-ref/ocr_03_0031.html @@ -96,7 +96,7 @@

String

-

Set either this parameter or image. Image URL. Currently, the following URLs are supported:

+

Set either this parameter or image. The image file has a size limit of 10 MB. The following image URLs are currently supported:

  • Public HTTP/HTTPS URL
  • URL provided by OBS.
NOTE:
  • The API response time depends on the image download time. If the image download takes a long time, the API call will fail.
  • Ensure that the storage service where the images to be detected reside is stable and reliable. OBS is recommended for storing image data.
  • The URL cannot contain Chinese characters. If Chinese characters exist, they must be encoded using UTF-8.
diff --git a/docs/ocr/api-ref/ocr_03_0042.html b/docs/ocr/api-ref/ocr_03_0042.html index 7cbc8d632..650132b7d 100644 --- a/docs/ocr/api-ref/ocr_03_0042.html +++ b/docs/ocr/api-ref/ocr_03_0042.html @@ -3,7 +3,7 @@

General Text

Function

This API detects and extracts text from images and converts the text and coordinates into JSON format. It can be used in various scenarios, such as scanned documents, electronic documents, books, receipts, and forms.

-

Constraints and Limitations

  • Only images in PNG, JPG, JPEG, BMP, GIF, or TIFF format can be recognized.
  • No side of the image can be smaller than 15 or larger than 8,192 pixels.
  • The area to be recognized must occupy more than 80% of the image. When scanning a table, ensure that all text and its surrounding area are included in the image.
  • An image can be rotated to any angle.
  • Text in images with complex backgrounds (such as outdoor scenery or anti-counterfeit watermarks) or distorted text cannot be recognized.
  • Supported languages: Chinese, English, some traditional Chinese, Malay, Ukrainian, Hindi, Russian, Vietnamese, Indonesian, Thai, Arabic, German, Latin, French, Italian, Spanish, Portuguese, Romanian, Polish Amharic, Japanese, Korean, Turkish, Norwegian, Danish, and Swedish.
+

Constraints and Limitations

  • Only images in PNG, JPG, JPEG, BMP, GIF, TIFF, WebP, PCX, ICO, PSD, or PDF format can be recognized.
  • No side of the image can be smaller than 15 or larger than 8,192 pixels.
  • The area to be recognized must occupy more than 80% of the image. When scanning a table, ensure that all text and its surrounding area are included in the image.
  • An image can be rotated to any angle.
  • Text in images with complex backgrounds (such as outdoor scenery or anti-counterfeit watermarks) or distorted text cannot be recognized.
  • Supported languages: Chinese, English, some traditional Chinese, Malay, Ukrainian, Hindi, Russian, Vietnamese, Indonesian, Thai, Arabic, German, Latin, French, Italian, Spanish, Portuguese, Romanian, Polish Amharic, Japanese, Korean, Turkish, Norwegian, Danish, Swedish, Khmer, and Hebrew.

URI

POST /v2/{project_id}/ocr/general-text

@@ -96,7 +96,7 @@

String

-

Set either this parameter or image. Image URL. Currently, the following URLs are supported:

+

Set either this parameter or image. The image file has a size limit of 10 MB. The following image URLs are currently supported:

  • Public HTTP/HTTPS URL
  • URL provided by OBS.
NOTE:
  • The API response time depends on the image download time. If the image download takes a long time, the API call will fail.
  • Ensure that the storage service where the images to be detected reside is stable and reliable. OBS is recommended for storing image data.
  • The URL cannot contain Chinese characters. If Chinese characters exist, they must be encoded using UTF-8.
@@ -142,8 +142,8 @@

String

-

Language. If this parameter is not specified, Chinese and English will be used by default. The options are as follows:

-
  • auto: automatic language classification
  • ms: Malay
  • uk: Ukrainian
  • hi: Hindi
  • ru: Russian
  • vi: Vietnamese
  • id: Indonesian
  • th: Thai
  • zh: Chinese and English
  • ar: Arabic
  • de: German
  • la: Latin
  • fr: French
  • it: Italian
  • es: Spanish
  • pt: Portuguese
  • ro: Romanian
  • pl: Polish
  • am: Amharic
  • ja: Japanese
  • ko: Korean
  • tr: Turkish
  • no: Norwegian
  • da: Danish
  • sv: Swedish
+

Language. If this parameter is not specified, German and English will be used by default. The options are:

+
  • auto: automatic language classification
  • ms: Malay
  • uk: Ukrainian
  • hi: Hindi
  • ru: Russian
  • vi: Vietnamese
  • id: Indonesian
  • th: Thai
  • zh: Chinese and English
  • ar: Arabic
  • de: German
  • la: Latin
  • fr: French
  • it: Italian
  • es: Spanish
  • pt: Portuguese
  • ro: Romanian
  • pl: Polish
  • am: Amharic
  • ja: Japanese
  • ko: Korean
  • tr: Turkish
  • no: Norwegian
  • da: Danish
  • sv: Swedish
  • km: Khmer
  • he: Hebrew

single_orientation_mode

@@ -157,6 +157,15 @@

If this parameter is not specified, false is used by default. In this case, the fields in the image are recognized as in multiple directions by default.

+

pdf_page_number

+ +

No

+ +

Integer

+ +

Specify which page of the PDF to recognize. If this parameter is specified, the content on the specified page is identified. If not specified, the default is to recognize the first page.

+ +
@@ -350,11 +359,11 @@ "direction" : 67.6506, "words_block_count" : 1, "words_block_list" : [ { - "words": "Word", + "words": "Word", "confidence" : 0.9999, "location" : [ [ 517, 447 ], [ 540, 504 ], [ 505, 518 ], [ 482, 461 ] ], "char_list" : [ { - "char": "Character", + "char": "Character", "char_location" : [ [ 517, 447 ], [ 530, 479 ], [ 495, 493 ], [ 482, 461 ] ], "char_confidence" : 0.9999 }, { diff --git a/docs/ocr/api-ref/ocr_03_0043.html b/docs/ocr/api-ref/ocr_03_0043.html index 5f3fce9f7..c94204063 100644 --- a/docs/ocr/api-ref/ocr_03_0043.html +++ b/docs/ocr/api-ref/ocr_03_0043.html @@ -1,7 +1,7 @@

Subscribing to an OCR Service

-
  1. Log in to the OCR management console.

    Select a region based on your business needs. For details about the regions where services are deployed, see Regions and Endpoints.

    +
    1. Log in to the OCR management console.

      Select a region based on service requirements. For details about the regions where services are deployed, see Regions and Endpoints.

    2. On the page displayed, select and subscribe to your desired APIs.
    diff --git a/docs/ocr/api-ref/ocr_03_0047.html b/docs/ocr/api-ref/ocr_03_0047.html index a6e18b592..140e67752 100644 --- a/docs/ocr/api-ref/ocr_03_0047.html +++ b/docs/ocr/api-ref/ocr_03_0047.html @@ -19,6 +19,11 @@

    This API detects and extracts text from images of general tables and converts the text into a structured format.

    +

    Smart Document Recognizer

    + +

    Recognizes text, analyzes layout, extracts key-value pairs, identifies tables in various formatted documents such as certificates, receipts, and forms, and converts the results into a structured JSON format.

    + +
    diff --git a/docs/ocr/api-ref/ocr_03_0060.html b/docs/ocr/api-ref/ocr_03_0060.html index 384e87af2..c0b4ae56c 100644 --- a/docs/ocr/api-ref/ocr_03_0060.html +++ b/docs/ocr/api-ref/ocr_03_0060.html @@ -10,7 +10,7 @@
  2. - diff --git a/docs/ocr/api-ref/ocr_03_0062.html b/docs/ocr/api-ref/ocr_03_0062.html index 369f45072..c7c0ae6f4 100644 --- a/docs/ocr/api-ref/ocr_03_0062.html +++ b/docs/ocr/api-ref/ocr_03_0062.html @@ -1,7 +1,7 @@

    Endpoint

    -

    An endpoint is the request address used to call an API. Different services have different endpoints for different regions. You can query all service endpoints at Regions and Endpoints.

    +

    An endpoint is the request address for calling an API. Endpoints vary depending on services and regions. For more information, see Regions and Endpoints.