@@ -4,15 +4,23 @@ use std::process;
4
4
5
5
fn main ( ) {
6
6
let args: Vec < String > = env:: args ( ) . collect ( ) ;
7
- let filename = match args. get ( 1 ) {
8
- Some ( f) => f,
7
+
8
+ parse_string ( match args. get ( 1 ) {
9
+ // read from file
10
+ Some ( filename) => std:: fs:: read_to_string ( filename) . unwrap ( ) ,
11
+
12
+ // read from stdin
9
13
None => {
10
- println ! ( "No arg provided" ) ;
11
- process:: exit ( 1 ) ;
14
+ let mut line = String :: new ( ) ;
15
+ std:: io:: stdin ( ) . read_line ( & mut line) . unwrap ( ) ;
16
+ line
12
17
}
13
- } ;
14
- let content = std:: fs:: read_to_string ( filename) . unwrap ( ) ;
18
+ } )
19
+ }
20
+
21
+ fn parse_string ( content : String ) {
15
22
let mut bytes = content. bytes ( ) ;
23
+
16
24
while let Some ( b) = bytes. next ( ) {
17
25
if b & 0x80 == 0 {
18
26
// if MSB is 0, is is ASCII.
@@ -23,42 +31,66 @@ fn main() {
23
31
std:: str :: from_utf8( & [ b] )
24
32
. unwrap( )
25
33
. replace( "\x0a " , "↵" )
26
- . replace( "\x20 " , "<space>" ) ,
34
+ . replace( "\x20 " , "<space>" )
35
+ . bold( ) ,
27
36
b
28
37
) ;
29
38
} else {
30
39
let nb = ( !b) . leading_zeros ( ) ;
31
- let bin_str = format ! ( "{:08b}" , b ) ;
32
- let ( first_bin , second_bin ) = bin_str . split_at ( nb as usize + 1 ) ;
40
+ let ( first , last ) = split_octet ( b , nb ) ;
41
+
33
42
print ! ( "{} " , "├" . bright_black( ) ) ;
34
- print ! ( "{}{}" , first_bin. magenta( ) , second_bin. green( ) , ) ;
35
- let mut v: Vec < u8 > = Vec :: new ( ) ;
43
+ print ! (
44
+ "{}{}" ,
45
+ format!( "{:04b}" , first) . magenta( ) ,
46
+ format!( "{:04b}" , last) . green( ) ,
47
+ ) ;
48
+
49
+ let mut v: Vec < u8 > = vec ! [ ] ;
36
50
let mut f: u32 = 0 ;
37
51
let base = 6 * ( nb - 1 ) ;
38
52
39
53
v. push ( b) ;
40
54
41
55
f |= ( b as u32 & ( u32:: pow ( 2 , 7 - nb) - 1 ) ) << base;
56
+
42
57
for i in 0 ..( nb - 1 ) {
43
58
let next_byte = bytes. next ( ) . unwrap ( ) ;
59
+
44
60
// if first 2 MSB are not '10', it's illegal sequence.
45
61
if next_byte & 0xC0 != 0x80 {
46
62
println ! ( "Illegal byte" ) ;
47
63
process:: exit ( 1 ) ;
48
64
}
49
- let bin_str = format ! ( "{:08b}" , next_byte) ;
50
- let ( first_bin, second_bin) = bin_str. split_at ( 2 ) ;
51
- print ! ( "{}{}" , first_bin. bright_black( ) , second_bin. green( ) ) ;
65
+
66
+ let ( first, last) = split_octet ( next_byte, 2 ) ;
67
+
68
+ print ! (
69
+ "{}{}" ,
70
+ format!( "{:02b}" , first) . bright_black( ) ,
71
+ format!( "{:06b}" , last) . green( )
72
+ ) ;
73
+
52
74
f |= ( next_byte as u32 & 0x3f ) << base - ( i + 1 ) * 6 ;
53
75
v. push ( next_byte) ;
54
76
}
77
+
55
78
println ! (
56
79
" ({} bytes) = {} {} (U+{:04X})" ,
57
80
nb,
58
81
format!( "{:0b}" , f) . cyan( ) ,
59
- std:: str :: from_utf8( & v[ ..] ) . unwrap( ) . bold( ) ,
82
+ std:: str :: from_utf8( & v[ ..] )
83
+ . unwrap_or( "[INVALID CODE]" )
84
+ . bold( ) ,
60
85
f
61
86
) ;
62
87
}
63
88
}
64
89
}
90
+
91
+ fn split_octet ( octet : u8 , separate_at : u32 ) -> ( u8 , u8 ) {
92
+ let mask = u8:: pow ( 2 , separate_at) - 1 ;
93
+ let first = ( octet & !mask) >> ( 8 - separate_at) ;
94
+ let last = octet & mask;
95
+ ( first, last)
96
+ }
0 commit comments