Skip to content
This repository has been archived by the owner on Jul 7, 2023. It is now read-only.

Commit

Permalink
Rm all uses of xrange and fix decode_length for slow_greedy_infer
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 193717890
  • Loading branch information
Ryan Sepassi committed Apr 20, 2018
1 parent 347084c commit 8cf5fa4
Show file tree
Hide file tree
Showing 42 changed files with 220 additions and 202 deletions.
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Compiled python modules.
*.pyc
*.DS_Store

# Byte-compiled
_pycache__/
Expand All @@ -17,3 +16,9 @@ dist/
# Sublime project files
*.sublime-project
*.sublime-workspace

# Tests
.pytest_cache/

# Other
*.DS_Store
2 changes: 1 addition & 1 deletion tensor2tensor/data_generators/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ with an integer denoting the length of the input list.

```
def length_generator(nbr_cases):
for _ in xrange(nbr_cases):
for _ in range(nbr_cases):
length = np.random.randint(100) + 1
yield {"inputs": [2] * length, "targets": [length]}
```
Expand Down
22 changes: 11 additions & 11 deletions tensor2tensor/data_generators/algorithmic.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

import numpy as np

from six.moves import xrange # pylint: disable=redefined-builtin
from six.moves import range # pylint: disable=redefined-builtin

from tensor2tensor.data_generators import generator_utils as utils
from tensor2tensor.data_generators import problem
Expand Down Expand Up @@ -113,9 +113,9 @@ def generator(self, nbr_symbols, max_length, nbr_cases):
A dictionary {"inputs": input-list, "targets": target-list} where
input-list and target-list are the same.
"""
for _ in xrange(nbr_cases):
for _ in range(nbr_cases):
l = np.random.randint(max_length) + 1
inputs = [np.random.randint(nbr_symbols) for _ in xrange(l)]
inputs = [np.random.randint(nbr_symbols) for _ in range(l)]
yield {"inputs": inputs, "targets": inputs}


Expand Down Expand Up @@ -153,9 +153,9 @@ def generator(self, nbr_symbols, max_length, nbr_cases):
target-list[i] = input-list[i] + shift.
"""
shift = 10
for _ in xrange(nbr_cases):
for _ in range(nbr_cases):
l = np.random.randint(max_length) + 1
inputs = [np.random.randint(nbr_symbols - shift) for _ in xrange(l)]
inputs = [np.random.randint(nbr_symbols - shift) for _ in range(l)]
yield {"inputs": inputs, "targets": [i + shift for i in inputs]}

@property
Expand Down Expand Up @@ -187,9 +187,9 @@ def generator(self, nbr_symbols, max_length, nbr_cases):
A dictionary {"inputs": input-list, "targets": target-list} where
target-list is input-list reversed.
"""
for _ in xrange(nbr_cases):
for _ in range(nbr_cases):
l = np.random.randint(max_length) + 1
inputs = [np.random.randint(nbr_symbols) for _ in xrange(l)]
inputs = [np.random.randint(nbr_symbols) for _ in range(l)]
yield {"inputs": inputs, "targets": list(reversed(inputs))}


Expand Down Expand Up @@ -265,7 +265,7 @@ def reverse_generator_nlplike(nbr_symbols,
"""
std_dev = max_length / scale_std_dev
distr_map = zipf_distribution(nbr_symbols, alpha)
for _ in xrange(nbr_cases):
for _ in range(nbr_cases):
l = int(abs(np.random.normal(loc=max_length / 2, scale=std_dev)) + 1)
inputs = zipf_random_sample(distr_map, l)
yield {"inputs": inputs, "targets": list(reversed(inputs))}
Expand Down Expand Up @@ -321,7 +321,7 @@ def random_number_lower_endian(length, base):
"""Helper function: generate a random number as a lower-endian digits list."""
if length == 1: # Last digit can be 0 only if length is 1.
return [np.random.randint(base)]
prefix = [np.random.randint(base) for _ in xrange(length - 1)]
prefix = [np.random.randint(base) for _ in range(length - 1)]
return prefix + [np.random.randint(base - 1) + 1] # Last digit is not 0.


Expand Down Expand Up @@ -354,7 +354,7 @@ def generator(self, base, max_length, nbr_cases):
"""
if max_length < 3:
raise ValueError("Maximum length must be at least 3.")
for _ in xrange(nbr_cases):
for _ in range(nbr_cases):
l1 = np.random.randint(max_length // 2) + 1
l2 = np.random.randint(max_length - l1 - 1) + 1
n1 = random_number_lower_endian(l1, base)
Expand Down Expand Up @@ -405,7 +405,7 @@ def generator(self, base, max_length, nbr_cases):
"""
if max_length < 3:
raise ValueError("Maximum length must be at least 3.")
for _ in xrange(nbr_cases):
for _ in range(nbr_cases):
l1 = np.random.randint(max_length // 2) + 1
l2 = np.random.randint(max_length - l1 - 1) + 1
n1 = random_number_lower_endian(l1, base)
Expand Down
8 changes: 4 additions & 4 deletions tensor2tensor/data_generators/algorithmic_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
# Dependency imports

import six
from six.moves import xrange # pylint: disable=redefined-builtin
from six.moves import range # pylint: disable=redefined-builtin
import sympy


Expand Down Expand Up @@ -421,7 +421,7 @@ def math_dataset_init(alphabet_size=26, digits=None, functions=None):
raise ValueError("digits cannot must be between 1 and 10. Got %s." % digits)
vlist = alphabet[:alphabet_size]
if digits is not None:
dlist = [str(d) for d in xrange(digits)]
dlist = [str(d) for d in range(digits)]
else:
dlist = []
if functions is None:
Expand Down Expand Up @@ -481,7 +481,7 @@ def algebra_inverse(alphabet_size=26, min_depth=0, max_depth=2,
"Got max_depth=%s, min_depth=%s" % (max_depth, min_depth))

alg_cfg = math_dataset_init(alphabet_size)
for _ in xrange(nbr_cases):
for _ in range(nbr_cases):
sample, target = generate_algebra_inverse_sample(
alg_cfg.vlist,
list(alg_cfg.ops.values()), alg_cfg.solve_ops, min_depth, max_depth)
Expand Down Expand Up @@ -522,7 +522,7 @@ def algebra_simplify(alphabet_size=26,
"Got max_depth=%s, min_depth=%s" % (max_depth, min_depth))

alg_cfg = math_dataset_init(alphabet_size, digits=5)
for _ in xrange(nbr_cases):
for _ in range(nbr_cases):
sample, target = generate_algebra_simplify_sample(
alg_cfg.vlist, list(alg_cfg.ops.values()), min_depth, max_depth)
yield {
Expand Down
4 changes: 2 additions & 2 deletions tensor2tensor/data_generators/algorithmic_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

# Dependency imports

from six.moves import xrange # pylint: disable=redefined-builtin
from six.moves import range # pylint: disable=redefined-builtin

from tensor2tensor.data_generators import algorithmic

Expand Down Expand Up @@ -51,7 +51,7 @@ def testZipfDistribution(self):
# more probable/frequent that the second in rank, three times more prob/freq
# that the third in rank and so on.
d = algorithmic.zipf_distribution(10, 1.0001)
for i in xrange(len(d[1:])-1):
for i in range(len(d[1:])-1):
self.assertEqual("%.4f" % (abs(d[i+1]-d[i+2])*(i+2)), "%.4f" % d[1])

def testReverseGeneratorNlpLike(self):
Expand Down
4 changes: 2 additions & 2 deletions tensor2tensor/data_generators/cifar.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,10 @@ def cifar_generator(cifar_version, tmp_dir, training, how_many, start_from=0):
num_images = images.shape[0]
images = images.reshape((num_images, 3, image_size, image_size))
all_images.extend([
np.squeeze(images[j]).transpose((1, 2, 0)) for j in xrange(num_images)
np.squeeze(images[j]).transpose((1, 2, 0)) for j in range(num_images)
])
labels = data[label_key]
all_labels.extend([labels[j] for j in xrange(num_images)])
all_labels.extend([labels[j] for j in range(num_images)])
return image_utils.image_generator(
all_images[start_from:start_from + how_many],
all_labels[start_from:start_from + how_many])
Expand Down
4 changes: 2 additions & 2 deletions tensor2tensor/data_generators/dna_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import itertools
# Dependency imports

from six.moves import xrange # pylint: disable=redefined-builtin
from six.moves import range # pylint: disable=redefined-builtin
from tensor2tensor.data_generators import text_encoder


Expand Down Expand Up @@ -77,7 +77,7 @@ def encode(self, s):
assert (len(bases) % self._chunk_size) == 0
num_chunks = len(bases) // self._chunk_size
ids = []
for chunk_idx in xrange(num_chunks):
for chunk_idx in range(num_chunks):
start_idx = chunk_idx * self._chunk_size
end_idx = start_idx + self._chunk_size
chunk = tuple(bases[start_idx:end_idx])
Expand Down
8 changes: 4 additions & 4 deletions tensor2tensor/data_generators/gene_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
import h5py
import numpy as np

from six.moves import xrange # pylint: disable=redefined-builtin
from six.moves import range # pylint: disable=redefined-builtin

from tensor2tensor.data_generators import dna_encoder
from tensor2tensor.data_generators import generator_utils
Expand Down Expand Up @@ -130,7 +130,7 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1):
# Start and wait for processes in batches
num_batches = int(
math.ceil(float(len(processes)) / MAX_CONCURRENT_PROCESSES))
for i in xrange(num_batches):
for i in range(num_batches):
start = i * MAX_CONCURRENT_PROCESSES
end = start + MAX_CONCURRENT_PROCESSES
current = processes[start:end]
Expand Down Expand Up @@ -211,7 +211,7 @@ def generate_shard_args(outfiles, num_examples):
"""Generate start and end indices per outfile."""
num_shards = len(outfiles)
num_examples_per_shard = num_examples // num_shards
start_idxs = [i * num_examples_per_shard for i in xrange(num_shards)]
start_idxs = [i * num_examples_per_shard for i in range(num_shards)]
end_idxs = list(start_idxs)
end_idxs.pop(0)
end_idxs.append(num_examples)
Expand Down Expand Up @@ -249,7 +249,7 @@ def dataset_generator(filepath,
if end_idx is None:
end_idx = inp_data.len()

for i in xrange(start_idx, end_idx):
for i in range(start_idx, end_idx):
if i % 100 == 0:
print("Generating example %d for %s" % (i, dataset))
inputs, mask, outputs = inp_data[i], mask_data[i], out_data[i]
Expand Down
6 changes: 3 additions & 3 deletions tensor2tensor/data_generators/generator_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

import requests
import six
from six.moves import xrange # pylint: disable=redefined-builtin
from six.moves import range # pylint: disable=redefined-builtin
import six.moves.urllib_request as urllib # Imports urllib on Python2, urllib.request on Python3

from tensor2tensor.data_generators import text_encoder
Expand Down Expand Up @@ -119,7 +119,7 @@ def sharded_name(base_name, shard, total_shards):

def shard_filepath(fname, num_shards):
return [
sharded_name(fname, shard, num_shards) for shard in xrange(num_shards)
sharded_name(fname, shard, num_shards) for shard in range(num_shards)
]


Expand Down Expand Up @@ -592,7 +592,7 @@ def pack_examples(examples,
if chop_long_sequences and len(x) > packed_length:
assert not has_inputs
num_fragments = len(x) // packed_length
for i in xrange(num_fragments):
for i in range(num_fragments):
yield packer(
x[packed_length * i:packed_length * (i + 1)], spacing).to_dict()
x = x[packed_length * num_fragments:]
Expand Down
4 changes: 2 additions & 2 deletions tensor2tensor/data_generators/lm1b.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

# Dependency imports

from six.moves import xrange # pylint: disable=redefined-builtin
from six.moves import range # pylint: disable=redefined-builtin

from tensor2tensor.data_generators import generator_utils
from tensor2tensor.data_generators import problem
Expand Down Expand Up @@ -79,7 +79,7 @@ def _train_data_filenames(tmp_dir):
os.path.join(tmp_dir,
"1-billion-word-language-modeling-benchmark-r13output",
"training-monolingual.tokenized.shuffled",
"news.en-%05d-of-00100" % i) for i in xrange(1, 100)
"news.en-%05d-of-00100" % i) for i in range(1, 100)
]


Expand Down
2 changes: 1 addition & 1 deletion tensor2tensor/data_generators/ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def generator(self, data_dir, tmp_dir, is_training):
num_examples = 2
ocr_dir = os.path.join(tmp_dir, "ocr/")
tf.logging.info("Looking for OCR data in %s." % ocr_dir)
for i in xrange(num_examples):
for i in range(num_examples):
image_filepath = os.path.join(ocr_dir, "%d.png" % i)
text_filepath = os.path.join(ocr_dir, "%d.txt" % i)
with tf.gfile.Open(text_filepath, "rb") as f:
Expand Down
14 changes: 7 additions & 7 deletions tensor2tensor/data_generators/text_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

import numpy as np
import six
from six.moves import xrange # pylint: disable=redefined-builtin
from six.moves import range # pylint: disable=redefined-builtin
from tensor2tensor.data_generators import tokenizer

import tensorflow as tf
Expand Down Expand Up @@ -385,7 +385,7 @@ def store_to_file(self, filename):
filename: Full path of the file to store the vocab to.
"""
with tf.gfile.Open(filename, "w") as f:
for i in xrange(len(self._id_to_token)):
for i in range(len(self._id_to_token)):
f.write(self._id_to_token[i] + "\n")


Expand Down Expand Up @@ -599,7 +599,7 @@ def _escaped_token_to_subtoken_strings(self, escaped_token):
start = 0
token_len = len(escaped_token)
while start < token_len:
for end in xrange(
for end in range(
min(token_len, start + self._max_subtoken_len), start, -1):
subtoken = escaped_token[start:end]
if subtoken in self._subtoken_string_to_id:
Expand Down Expand Up @@ -785,7 +785,7 @@ def build_from_token_counts(self,
# with high enough counts for our new vocabulary.
if min_count < 1:
min_count = 1
for i in xrange(num_iterations):
for i in range(num_iterations):
tf.logging.info("Iteration {0}".format(i))

# Collect all substrings of the encoded token that break along current
Expand All @@ -800,7 +800,7 @@ def build_from_token_counts(self,
if max_subtoken_length is not None:
last_position = min(last_position, start + max_subtoken_length)

for end in xrange(start + 1, last_position):
for end in range(start + 1, last_position):
new_subtoken = escaped_token[start:end]
subtoken_counts[new_subtoken] += count
start += len(subtoken)
Expand All @@ -817,7 +817,7 @@ def build_from_token_counts(self,
# Consider the candidates longest to shortest, so that if we accept
# a longer subtoken string, we can decrement the counts of its prefixes.
new_subtoken_strings = []
for lsub in xrange(len(len_to_subtoken_strings) - 1, 0, -1):
for lsub in range(len(len_to_subtoken_strings) - 1, 0, -1):
subtoken_strings = len_to_subtoken_strings[lsub]
for subtoken_string in subtoken_strings:
count = subtoken_counts[subtoken_string]
Expand All @@ -826,7 +826,7 @@ def build_from_token_counts(self,
# explicitly, regardless of count.
if subtoken_string not in self._alphabet:
new_subtoken_strings.append((count, subtoken_string))
for l in xrange(1, lsub):
for l in range(1, lsub):
subtoken_counts[subtoken_string[:l]] -= count

# Include the alphabet explicitly to guarantee all strings are encodable.
Expand Down
4 changes: 2 additions & 2 deletions tensor2tensor/data_generators/text_encoder_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
# Dependency imports
import mock
import six
from six.moves import xrange # pylint: disable=redefined-builtin
from six.moves import range # pylint: disable=redefined-builtin

from tensor2tensor.data_generators import text_encoder
import tensorflow as tf
Expand Down Expand Up @@ -193,7 +193,7 @@ def test_long_tokens(self):
long_tokens = []
for _ in range(num_tokens):
long_token = "".join([random.choice(string.ascii_uppercase)
for _ in xrange(token_length)])
for _ in range(token_length)])
long_tokens.append(long_token)

corpus = " ".join(long_tokens)
Expand Down
6 changes: 3 additions & 3 deletions tensor2tensor/data_generators/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
# Dependency imports

import six
from six.moves import xrange # pylint: disable=redefined-builtin
from six.moves import range # pylint: disable=redefined-builtin
import tensorflow as tf

# Conversion between Unicode and UTF-8, if required (on Python2)
Expand All @@ -60,7 +60,7 @@

# This set contains all letter and number characters.
_ALPHANUMERIC_CHAR_SET = set(
six.unichr(i) for i in xrange(sys.maxunicode)
six.unichr(i) for i in range(sys.maxunicode)
if (unicodedata.category(six.unichr(i)).startswith("L") or
unicodedata.category(six.unichr(i)).startswith("N")))

Expand All @@ -79,7 +79,7 @@ def encode(text):
token_start = 0
# Classify each character in the input string
is_alnum = [c in _ALPHANUMERIC_CHAR_SET for c in text]
for pos in xrange(1, len(text)):
for pos in range(1, len(text)):
if is_alnum[pos] != is_alnum[pos - 1]:
token = text[token_start:pos]
if token != u" " or token_start == 0:
Expand Down
Loading

0 comments on commit 8cf5fa4

Please sign in to comment.