-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathOCRHelper.cs
110 lines (101 loc) · 3.66 KB
/
OCRHelper.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
using System;
using System.IO;
using System.Net;
using System.Text;
using Newtonsoft.Json;
using System.Configuration;
using System.Web;
namespace ParsePdf
{
internal class OcrHelper
{
public static string DoOcr(byte[] fileBytes)
{
// Cognitive vision API settings from app.config
// See https://docs.microsoft.com/en-us/azure/cognitive-services/computer-vision/home
string endpoint = ConfigurationManager.AppSettings["AzureCognitiveApiEndpoint"];
string apiKey = ConfigurationManager.AppSettings["AzureCognitiveApiKey"];
// Request parameters
var queryString = HttpUtility.ParseQueryString(string.Empty);
queryString["language"] = "unk";
queryString["detectOrientation "] = "true";
var url = endpoint + "/vision/v1.0/ocr?" + queryString;
var request = WebRequest.Create(url) as HttpWebRequest;
if (request != null)
{
request.Accept = "application/json";
request.Headers.Add("Ocp-Apim-Subscription-Key", apiKey);
request.ContentType = "application/octet-stream";
request.Method = "POST";
request.KeepAlive = true;
request.ContentLength = fileBytes.Length;
var requestStream = request.GetRequestStreamAsync().Result;
requestStream.Write(fileBytes, 0, fileBytes.Length);
requestStream.Close();
try
{
using (var response = request.GetResponse() as HttpWebResponse)
{
if (response != null && response.StatusCode == HttpStatusCode.OK)
{
using (var stream = response.GetResponseStream())
{
if (stream != null)
{
StreamReader sr = new StreamReader(stream);
string outputJson = sr.ReadToEnd();
OcrResponseObject resp = JsonConvert.DeserializeObject<OcrResponseObject>(outputJson);
return resp.ToString();
}
}
}
}
}
catch (Exception ex)
{
Console.WriteLine("Error with OCR request: " + ex.Message);
throw;
}
}
return string.Empty;
}
public class OcrResponseObject
{
public string language;
public string textAngle;
public string orientation;
public Region[] regions;
public override string ToString()
{
StringBuilder sb = new StringBuilder();
foreach (var region in regions)
{
foreach (var line in region.lines)
{
foreach (var word in line.words)
{
sb.Append(word.text + " ");
}
sb.AppendLine();
}
}
return sb.ToString();
}
}
public class Region
{
public string boundingBox;
public Line[] lines;
}
public class Line
{
public string boundingBox;
public Word[] words;
}
public class Word
{
public string boundingBox;
public string text;
}
}
}