Bugfix: --num-samples-per-shard must be int (#254)

* Bugfix: --num-samples-per-shard must be int * bump version
awslabs · Dec 19, 2017 · 078070a · 078070a
1 parent 2e2ce80
commit 078070a
Show file tree

Hide file tree

Showing 4 changed files with 7 additions and 1 deletion.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,10 @@ Note that Sockeye has checks in place to not translate with an old model that wa
 
 Each version section may have have subsections for: _Added_, _Changed_, _Removed_, _Deprecated_, and _Fixed_.
 
+## [1.15.7]
+### Fixed
+- fixed a problem with `--num-samples-per-shard` flag not being parsed as int.
+
 ## [1.15.6]
 ### Added
  - New CLI `sockeye.prepare_data` for preprocessing the training data only once before training,

diff --git a/sockeye/__init__.py b/sockeye/__init__.py
@@ -11,4 +11,4 @@
 # express or implied. See the License for the specific language governing
 # permissions and limitations under the License.
 
-__version__ = '1.15.6'
+__version__ = '1.15.7'
diff --git a/sockeye/arguments.py b/sockeye/arguments.py
@@ -345,6 +345,7 @@ def add_prepare_data_cli_args(params):
     add_bucketing_args(params)
 
     params.add_argument('--num-samples-per-shard',
+                        type=int_greater_or_equal(1),
                         default=1000000,
                         help='The approximate number of samples per shard. Default: %(default)s.')
 

diff --git a/test/common.py b/test/common.py
@@ -203,6 +203,7 @@ def run_train_translate(train_params: str,
     :param dev_target_path: Path to the development target file.
     :param test_source_path: Path to the test source file.
     :param test_target_path: Path to the test target file.
+    :param use_prepared_data: Whether to use the prepared data functionality.
     :param max_seq_len: The maximum sequence length.
     :param restrict_lexicon: Additional translation run with top-k lexicon-based vocabulary restriction.
     :param work_dir: The directory to store the model and other outputs in.