-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathBase2n.php
302 lines (253 loc) · 10.9 KB
/
Base2n.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
<?php
/**
* Binary-to-text PHP Utilities
*
* @package binary-to-text-php
* @link https://github.com/ademarre/binary-to-text-php
* @author Andre DeMarre
* @copyright 2009-2013 Andre DeMarre
* @license http://opensource.org/licenses/MIT MIT
*/
/**
* Class for binary-to-text encoding with a base of 2^n
*
* The Base2n class is for binary-to-text conversion. It employs a
* generalization of the algorithms used by many encoding schemes that
* use a fixed number of bits to encode each character. In other words,
* the base is a power of 2.
*
* Earlier versions of this class were named
* FixedBitNotation and FixedBitEncoding.
*
* @package binary-to-text-php
*/
class Base2n
{
protected $_chars;
protected $_bitsPerCharacter;
protected $_radix;
protected $_rightPadFinalBits;
protected $_padFinalGroup;
protected $_padCharacter;
protected $_caseSensitive;
protected $_charmap;
/**
* Constructor
*
* @param integer $bitsPerCharacter Bits to use for each encoded character
* @param string $chars Base character alphabet
* @param boolean $caseSensitive To decode in a case-sensitive manner
* @param boolean $rightPadFinalBits How to encode last character
* @param boolean $padFinalGroup Add padding to end of encoded output
* @param string $padCharacter Character to use for padding
*
* @throws InvalidArgumentException for incompatible parameters
*/
public function __construct(
$bitsPerCharacter,
$chars = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_',
$caseSensitive = TRUE, $rightPadFinalBits = FALSE,
$padFinalGroup = FALSE, $padCharacter = '=')
{
// Ensure validity of $chars
if (!is_string($chars) || ($charLength = strlen($chars)) < 2) {
throw new InvalidArgumentException('$chars must be a string of at least two characters');
}
// Ensure validity of $padCharacter
if ($padFinalGroup) {
if (!is_string($padCharacter) || !isset($padCharacter[0])) {
throw new InvalidArgumentException('$padCharacter must be a string of one character');
}
if ($caseSensitive) {
$padCharFound = strpos($chars, $padCharacter[0]);
} else {
$padCharFound = stripos($chars, $padCharacter[0]);
}
if ($padCharFound !== FALSE) {
throw new InvalidArgumentException('$padCharacter can not be a member of $chars');
}
}
// Ensure validity of $bitsPerCharacter
if (!is_int($bitsPerCharacter)) {
throw new InvalidArgumentException('$bitsPerCharacter must be an integer');
}
if ($bitsPerCharacter < 1) {
// $bitsPerCharacter must be at least 1
throw new InvalidArgumentException('$bitsPerCharacter can not be less than 1');
} elseif ($charLength < 1 << $bitsPerCharacter) {
// Character length of $chars is too small for $bitsPerCharacter
// Find greatest acceptable value of $bitsPerCharacter
$bitsPerCharacter = 1;
$radix = 2;
while ($charLength >= ($radix <<= 1) && $bitsPerCharacter < 8) {
$bitsPerCharacter++;
}
$radix >>= 1;
throw new InvalidArgumentException(
'$bitsPerCharacter can not be more than ' . $bitsPerCharacter
. ' given $chars length of ' . $charLength
. ' (max radix ' . $radix . ')');
} elseif ($bitsPerCharacter > 8) {
// $bitsPerCharacter must not be greater than 8
throw new InvalidArgumentException('$bitsPerCharacter can not be greater than 8');
} else {
$radix = 1 << $bitsPerCharacter;
}
$this->_chars = $chars;
$this->_bitsPerCharacter = $bitsPerCharacter;
$this->_radix = $radix;
$this->_rightPadFinalBits = $rightPadFinalBits;
$this->_padFinalGroup = $padFinalGroup;
$this->_padCharacter = $padCharacter[0];
$this->_caseSensitive = $caseSensitive;
}
/**
* Encode a string
*
* @param string $rawString Binary data to encode
* @return string
*/
public function encode($rawString)
{
// Unpack string into an array of bytes
$bytes = unpack('C*', $rawString);
$byteCount = count($bytes);
$encodedString = '';
$byte = array_shift($bytes);
$bitsRead = 0;
$oldBits = 0;
$chars = $this->_chars;
$bitsPerCharacter = $this->_bitsPerCharacter;
$rightPadFinalBits = $this->_rightPadFinalBits;
$padFinalGroup = $this->_padFinalGroup;
$padCharacter = $this->_padCharacter;
$charsPerByte = 8 / $bitsPerCharacter;
$encodedLength = $byteCount * $charsPerByte;
// Generate encoded output; each loop produces one encoded character
for ($c = 0; $c < $encodedLength; $c++) {
// Get the bits needed for this encoded character
if ($bitsRead + $bitsPerCharacter > 8) {
// Not enough bits remain in this byte for the current character
// Save the remaining bits before getting the next byte
$oldBitCount = 8 - $bitsRead;
$oldBits = $byte ^ ($byte >> $oldBitCount << $oldBitCount);
$newBitCount = $bitsPerCharacter - $oldBitCount;
if (!$bytes) {
// Last bits; match final character and exit loop
if ($rightPadFinalBits) $oldBits <<= $newBitCount;
$encodedString .= $chars[$oldBits];
if ($padFinalGroup) {
// Array of the lowest common multiples of $bitsPerCharacter and 8, divided by 8
$lcmMap = array(1 => 1, 2 => 1, 3 => 3, 4 => 1, 5 => 5, 6 => 3, 7 => 7, 8 => 1);
$bytesPerGroup = $lcmMap[$bitsPerCharacter];
$pads = $bytesPerGroup * $charsPerByte - ceil((strlen($rawString) % $bytesPerGroup) * $charsPerByte);
$encodedString .= str_repeat($padCharacter, $pads);
}
break;
}
// Get next byte
$byte = array_shift($bytes);
$bitsRead = 0;
} else {
$oldBitCount = 0;
$newBitCount = $bitsPerCharacter;
}
// Read only the needed bits from this byte
$bits = $byte >> 8 - ($bitsRead + ($newBitCount));
$bits ^= $bits >> $newBitCount << $newBitCount;
$bitsRead += $newBitCount;
if ($oldBitCount) {
// Bits come from seperate bytes, add $oldBits to $bits
$bits = ($oldBits << $newBitCount) | $bits;
}
$encodedString .= $chars[$bits];
}
return $encodedString;
}
/**
* Decode a string
*
* @param string $encodedString Data to decode
* @param boolean $strict Returns NULL if $encodedString contains an undecodable character
* @return string
*/
public function decode($encodedString, $strict = FALSE)
{
if (!$encodedString || !is_string($encodedString)) {
// Empty string, nothing to decode
return '';
}
$chars = $this->_chars;
$bitsPerCharacter = $this->_bitsPerCharacter;
$radix = $this->_radix;
$rightPadFinalBits = $this->_rightPadFinalBits;
$padFinalGroup = $this->_padFinalGroup;
$padCharacter = $this->_padCharacter;
$caseSensitive = $this->_caseSensitive;
// Get index of encoded characters
if ($this->_charmap) {
$charmap = $this->_charmap;
} else {
$charmap = array();
for ($i = 0; $i < $radix; $i++) {
$charmap[$chars[$i]] = $i;
}
$this->_charmap = $charmap;
}
// The last encoded character is $encodedString[$lastNotatedIndex]
$lastNotatedIndex = strlen($encodedString) - 1;
// Remove trailing padding characters
if ($padFinalGroup) {
while ($encodedString[$lastNotatedIndex] === $padCharacter) {
$encodedString = substr($encodedString, 0, $lastNotatedIndex);
$lastNotatedIndex--;
}
}
$rawString = '';
$byte = 0;
$bitsWritten = 0;
// Convert each encoded character to a series of unencoded bits
for ($c = 0; $c <= $lastNotatedIndex; $c++) {
if (!$caseSensitive && !isset($charmap[$encodedString[$c]])) {
// Encoded character was not found; try other case
if (isset($charmap[$cUpper = strtoupper($encodedString[$c])])) {
$charmap[$encodedString[$c]] = $charmap[$cUpper];
} elseif (isset($charmap[$cLower = strtolower($encodedString[$c])])) {
$charmap[$encodedString[$c]] = $charmap[$cLower];
}
}
if (isset($charmap[$encodedString[$c]])) {
$bitsNeeded = 8 - $bitsWritten;
$unusedBitCount = $bitsPerCharacter - $bitsNeeded;
// Get the new bits ready
if ($bitsNeeded > $bitsPerCharacter) {
// New bits aren't enough to complete a byte; shift them left into position
$newBits = $charmap[$encodedString[$c]] << $bitsNeeded - $bitsPerCharacter;
$bitsWritten += $bitsPerCharacter;
} elseif ($c !== $lastNotatedIndex || $rightPadFinalBits) {
// Zero or more too many bits to complete a byte; shift right
$newBits = $charmap[$encodedString[$c]] >> $unusedBitCount;
$bitsWritten = 8; //$bitsWritten += $bitsNeeded;
} else {
// Final bits don't need to be shifted
$newBits = $charmap[$encodedString[$c]];
$bitsWritten = 8;
}
$byte |= $newBits;
if ($bitsWritten === 8 || $c === $lastNotatedIndex) {
// Byte is ready to be written
$rawString .= pack('C', $byte);
if ($c !== $lastNotatedIndex) {
// Start the next byte
$bitsWritten = $unusedBitCount;
$byte = ($charmap[$encodedString[$c]] ^ ($newBits << $unusedBitCount)) << 8 - $bitsWritten;
}
}
} elseif ($strict) {
// Unable to decode character; abort
return NULL;
}
}
return $rawString;
}
}