Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
SimpleHTR
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Fabian Mersch
SimpleHTR
Commits
1cd108db
Commit
1cd108db
authored
1 year ago
by
fabian
Browse files
Options
Downloads
Patches
Plain Diff
get top n beams
parent
df06b7f5
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
data/Matrikelnummer.txt
+122
-0
122 additions, 0 deletions
data/Matrikelnummer.txt
src/model.py
+3
-1
3 additions, 1 deletion
src/model.py
src/webserver.py
+30
-2
30 additions, 2 deletions
src/webserver.py
with
155 additions
and
3 deletions
data/Matrikelnummer.txt
0 → 100644
+
122
−
0
View file @
1cd108db
2618131
5396159
4195500
2477916
1451975
1527582
1902738
1021949
3396397
5654451
2344214
1165119
4673022
1231797
847962
3185249
3612026
3561847
9826007
2182311
4361295
2880477
8649586
4733755
2894201
6395638
7236398
8746104
7684872
2938895
5181005
6171182
8651985
5875557
7681752
6336668
4187880
7260199
1167683
9091573
2918857
9216329
4164654
8594589
3159466
7604001
3421741
7254079
1270444
5645253
980528
708661
8328098
8221551
3148516
1800021
5571110
2855211
7828272
6925189
7942598
624266
3187881
3256903
8196660
6132367
9453755
6611707
9613058
9930906
6200765
6558831
4527000
2344683
8898890
9120266
1878736
4611990
31286
7712537
5559006
4883443
6825506
2099459
7323188
4974718
1906938
4541044
9647493
1634307
9728419
5011573
2060543
3382411
6996707
1444539
3305362
2574220
4196813
3169919
515304
8923309
7560572
660534
948191
434391
5246351
4696889
5771826
2645612
1035896
2163194
4594761
2139089
6826880
8984885
8653828
3789292
6079828
5337008
4980605
\ No newline at end of file
This diff is collapsed.
Click to expand it.
src/model.py
+
3
−
1
View file @
1cd108db
...
...
@@ -195,7 +195,9 @@ class Model:
# word beam search: already contains label strings
if
self
.
decoder_type
==
DecoderType
.
WordBeamSearch
:
label_strs
=
ctc_output
label_strs
=
ctc_output
[
0
]
for
x
in
ctc_output
:
print
([
''
.
join
([
self
.
char_list
[
c
]
for
c
in
labelStr
])
for
labelStr
in
x
])
# TF decoders: label strings are contained in sparse tensor
else
:
...
...
This diff is collapsed.
Click to expand it.
src/webserver.py
+
30
−
2
View file @
1cd108db
...
...
@@ -43,9 +43,22 @@ def predictNach():
model_name
.
decoder
=
WordBeamSearch
(
50
,
'
Words
'
,
0.0
,
corpus
.
encode
(
'
utf8
'
),
chars
.
encode
(
'
utf8
'
),
word_chars
.
encode
(
'
utf8
'
))
recognized
,
probability
=
htr_model
.
Model
.
infer_batch
(
model_name
,
batch
)
#convert corpus to list, split at space
corpus
=
corpus
.
split
()
result_list
=
[]
for
name
in
recognized
:
indecies
=
[]
for
i
in
range
(
len
(
corpus
)):
if
name
==
corpus
[
i
]:
indecies
.
append
(
i
)
if
len
(
indecies
)
==
0
:
indecies
.
append
(
-
1
)
result_list
.
append
((
name
,
indecies
))
result
=
{
'
recognized
'
:
re
cognized
[
0
]
,
'
recognized
'
:
re
sult_list
,
}
return
jsonify
(
result
)
...
...
@@ -59,6 +72,7 @@ def predictVor():
image_array
=
image_array
[:
-
2
]
image_array
=
image_array
.
reshape
((
h
,
w
))
preprocessor
=
htr_preprocessor
.
Preprocessor
(
htr
.
get_img_size
(),
dynamic_width
=
True
,
padding
=
16
)
print
(
image_array
.
shape
)
processed_image
=
preprocessor
.
process_img
(
image_array
)
batch
=
htr_data_loader
.
Batch
([
processed_image
],
None
,
1
)
...
...
@@ -69,9 +83,23 @@ def predictVor():
word_chars
.
encode
(
'
utf8
'
))
recognized
,
probability
=
htr_model
.
Model
.
infer_batch
(
model_name
,
batch
)
#convert corpus to list, split at space
corpus
=
corpus
.
split
()
result_list
=
[]
for
name
in
recognized
:
indecies
=
[]
for
i
in
range
(
len
(
corpus
)):
if
name
==
corpus
[
i
]:
indecies
.
append
(
i
)
if
len
(
indecies
)
==
0
:
indecies
.
append
(
-
1
)
result_list
.
append
((
name
,
indecies
))
result
=
{
'
recognized
'
:
re
cognized
[
0
]
,
'
recognized
'
:
re
sult_list
,
}
return
jsonify
(
result
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment