blob: 97d22f79ca23d3fc620ae779d80959e6152de05d [file] [log] [blame]
Copybara botbe50d492023-11-30 00:16:42 +01001<?php
2
3/**
4* FixedBitNotation
5*
6* @author Andre DeMarre
7* @package FixedBitNotation
8*/
9
10/**
11* The FixedBitNotation class is for binary to text conversion. It
12* can handle many encoding schemes, formally defined or not, that
13* use a fixed number of bits to encode each character.
14*
15* @package FixedBitNotation
16*/
17class FixedBitNotation
18{
19 protected $_chars;
20 protected $_bitsPerCharacter;
21 protected $_radix;
22 protected $_rightPadFinalBits;
23 protected $_padFinalGroup;
24 protected $_padCharacter;
25 protected $_charmap;
26
27 /**
28 * Constructor
29 *
30 * @param integer $bitsPerCharacter Bits to use for each encoded
31 * character
32 * @param string $chars Base character alphabet
33 * @param boolean $rightPadFinalBits How to encode last character
34 * @param boolean $padFinalGroup Add padding to end of encoded
35 * output
36 * @param string $padCharacter Character to use for padding
37 */
38 public function __construct(
39 $bitsPerCharacter, $chars = NULL, $rightPadFinalBits = FALSE,
40 $padFinalGroup = FALSE, $padCharacter = '=')
41 {
42 // Ensure validity of $chars
43 if (!is_string($chars) || ($charLength = strlen($chars)) < 2) {
44 $chars =
45 '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-,';
46 $charLength = 64;
47 }
48
49 // Ensure validity of $bitsPerCharacter
50 if ($bitsPerCharacter < 1) {
51 // $bitsPerCharacter must be at least 1
52 $bitsPerCharacter = 1;
53 $radix = 2;
54
55 } elseif ($charLength < 1 << $bitsPerCharacter) {
56 // Character length of $chars is too small for $bitsPerCharacter
57 // Set $bitsPerCharacter to greatest acceptable value
58 $bitsPerCharacter = 1;
59 $radix = 2;
60
61 while ($charLength >= ($radix <<= 1) && $bitsPerCharacter < 8) {
62 $bitsPerCharacter++;
63 }
64
65 $radix >>= 1;
66
67 } elseif ($bitsPerCharacter > 8) {
68 // $bitsPerCharacter must not be greater than 8
69 $bitsPerCharacter = 8;
70 $radix = 256;
71
72 } else {
73 $radix = 1 << $bitsPerCharacter;
74 }
75
76 $this->_chars = $chars;
77 $this->_bitsPerCharacter = $bitsPerCharacter;
78 $this->_radix = $radix;
79 $this->_rightPadFinalBits = $rightPadFinalBits;
80 $this->_padFinalGroup = $padFinalGroup;
81 $this->_padCharacter = $padCharacter[0];
82 }
83
84 /**
85 * Encode a string
86 *
87 * @param string $rawString Binary data to encode
88 * @return string
89 */
90 public function encode($rawString)
91 {
92 // Unpack string into an array of bytes
93 $bytes = unpack('C*', $rawString);
94 $byteCount = count($bytes);
95
96 $encodedString = '';
97 $byte = array_shift($bytes);
98 $bitsRead = 0;
99
100 $chars = $this->_chars;
101 $bitsPerCharacter = $this->_bitsPerCharacter;
102 $rightPadFinalBits = $this->_rightPadFinalBits;
103 $padFinalGroup = $this->_padFinalGroup;
104 $padCharacter = $this->_padCharacter;
105
106 // Generate encoded output;
107 // each loop produces one encoded character
108 for ($c = 0; $c < $byteCount * 8 / $bitsPerCharacter; $c++) {
109
110 // Get the bits needed for this encoded character
111 if ($bitsRead + $bitsPerCharacter > 8) {
112 // Not enough bits remain in this byte for the current
113 // character
114 // Save the remaining bits before getting the next byte
115 $oldBitCount = 8 - $bitsRead;
116 $oldBits = $byte ^ ($byte >> $oldBitCount << $oldBitCount);
117 $newBitCount = $bitsPerCharacter - $oldBitCount;
118
119 if (!$bytes) {
120 // Last bits; match final character and exit loop
121 if ($rightPadFinalBits) $oldBits <<= $newBitCount;
122 $encodedString .= $chars[$oldBits];
123
124 if ($padFinalGroup) {
125 // Array of the lowest common multiples of
126 // $bitsPerCharacter and 8, divided by 8
127 $lcmMap = array(1 => 1, 2 => 1, 3 => 3, 4 => 1,
128 5 => 5, 6 => 3, 7 => 7, 8 => 1);
129 $bytesPerGroup = $lcmMap[$bitsPerCharacter];
130 $pads = $bytesPerGroup * 8 / $bitsPerCharacter
131 - ceil((strlen($rawString) % $bytesPerGroup)
132 * 8 / $bitsPerCharacter);
133 $encodedString .= str_repeat($padCharacter[0], $pads);
134 }
135
136 break;
137 }
138
139 // Get next byte
140 $byte = array_shift($bytes);
141 $bitsRead = 0;
142
143 } else {
144 $oldBitCount = 0;
145 $newBitCount = $bitsPerCharacter;
146 }
147
148 // Read only the needed bits from this byte
149 $bits = $byte >> 8 - ($bitsRead + ($newBitCount));
150 $bits ^= $bits >> $newBitCount << $newBitCount;
151 $bitsRead += $newBitCount;
152
153 if ($oldBitCount) {
154 // Bits come from seperate bytes, add $oldBits to $bits
155 $bits = ($oldBits << $newBitCount) | $bits;
156 }
157
158 $encodedString .= $chars[$bits];
159 }
160
161 return $encodedString;
162 }
163
164 /**
165 * Decode a string
166 *
167 * @param string $encodedString Data to decode
168 * @param boolean $caseSensitive
169 * @param boolean $strict Returns NULL if $encodedString contains
170 * an undecodable character
171 * @return string|NULL
172 */
173 public function decode($encodedString, $caseSensitive = TRUE,
174 $strict = FALSE)
175 {
176 if (!$encodedString || !is_string($encodedString)) {
177 // Empty string, nothing to decode
178 return '';
179 }
180
181 $chars = $this->_chars;
182 $bitsPerCharacter = $this->_bitsPerCharacter;
183 $radix = $this->_radix;
184 $rightPadFinalBits = $this->_rightPadFinalBits;
185 $padFinalGroup = $this->_padFinalGroup;
186 $padCharacter = $this->_padCharacter;
187
188 // Get index of encoded characters
189 if ($this->_charmap) {
190 $charmap = $this->_charmap;
191
192 } else {
193 $charmap = array();
194
195 for ($i = 0; $i < $radix; $i++) {
196 $charmap[$chars[$i]] = $i;
197 }
198
199 $this->_charmap = $charmap;
200 }
201
202 // The last encoded character is $encodedString[$lastNotatedIndex]
203 $lastNotatedIndex = strlen($encodedString) - 1;
204
205 // Remove trailing padding characters
206 while ($encodedString[$lastNotatedIndex] == $padCharacter[0]) {
207 $encodedString = substr($encodedString, 0, $lastNotatedIndex);
208 $lastNotatedIndex--;
209 }
210
211 $rawString = '';
212 $byte = 0;
213 $bitsWritten = 0;
214
215 // Convert each encoded character to a series of unencoded bits
216 for ($c = 0; $c <= $lastNotatedIndex; $c++) {
217
218 if (!isset($charmap[$encodedString[$c]]) && !$caseSensitive) {
219 // Encoded character was not found; try other case
220 if (isset($charmap[$cUpper
221 = strtoupper($encodedString[$c])])) {
222 $charmap[$encodedString[$c]] = $charmap[$cUpper];
223
224 } elseif (isset($charmap[$cLower
225 = strtolower($encodedString[$c])])) {
226 $charmap[$encodedString[$c]] = $charmap[$cLower];
227 }
228 }
229
230 if (isset($charmap[$encodedString[$c]])) {
231 $bitsNeeded = 8 - $bitsWritten;
232 $unusedBitCount = $bitsPerCharacter - $bitsNeeded;
233
234 // Get the new bits ready
235 if ($bitsNeeded > $bitsPerCharacter) {
236 // New bits aren't enough to complete a byte; shift them
237 // left into position
238 $newBits = $charmap[$encodedString[$c]] << $bitsNeeded
239 - $bitsPerCharacter;
240 $bitsWritten += $bitsPerCharacter;
241
242 } elseif ($c != $lastNotatedIndex || $rightPadFinalBits) {
243 // Zero or more too many bits to complete a byte;
244 // shift right
245 $newBits = $charmap[$encodedString[$c]] >> $unusedBitCount;
246 $bitsWritten = 8; //$bitsWritten += $bitsNeeded;
247
248 } else {
249 // Final bits don't need to be shifted
250 $newBits = $charmap[$encodedString[$c]];
251 $bitsWritten = 8;
252 }
253
254 $byte |= $newBits;
255
256 if ($bitsWritten == 8 || $c == $lastNotatedIndex) {
257 // Byte is ready to be written
258 $rawString .= pack('C', $byte);
259
260 if ($c != $lastNotatedIndex) {
261 // Start the next byte
262 $bitsWritten = $unusedBitCount;
263 $byte = ($charmap[$encodedString[$c]]
264 ^ ($newBits << $unusedBitCount)) << 8 - $bitsWritten;
265 }
266 }
267
268 } elseif ($strict) {
269 // Unable to decode character; abort
270 return NULL;
271 }
272 }
273
274 return $rawString;
275 }
276}