diff --git a/CMakeLists.txt b/CMakeLists.txt
index d25b9ec5..b12b124c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -48,6 +48,12 @@ install(FILES
     DESTINATION "${SIMDPP_PKGCONFIGDIR}"
 )
 
+install(FILES
+    "${CMAKE_CURRENT_SOURCE_DIR}/README.md"
+    "${CMAKE_CURRENT_SOURCE_DIR}/CONTRIBUTING.md"
+    "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE_1_0.txt"
+    DESTINATION "${SIMDPP_DOCDIR}"
+)
 
 enable_testing()
 
diff --git a/doc/wiki/misc/for_each.mwiki b/doc/wiki/misc/for_each.mwiki
index 18f1a0b3..edf1519e 100644
--- a/doc/wiki/misc/for_each.mwiki
+++ b/doc/wiki/misc/for_each.mwiki
@@ -3,7 +3,7 @@
 {{dcl begin}}
 {{dcl |
 template<unsigned N, class V, class F>
-void for_each( const any_vec<N, V>& v, const F& function );
+void for_each( const any_vec<N, V>& v, F function );
 }}
 {{dcl end}}
 
diff --git a/doc/wiki/misc/insert.mwiki b/doc/wiki/misc/insert.mwiki
index f1d37091..c9431f0d 100644
--- a/doc/wiki/misc/insert.mwiki
+++ b/doc/wiki/misc/insert.mwiki
@@ -3,7 +3,7 @@
 {{dcl begin}}
 {{dcl |
 template< unsigned K, unsigned N, class E >
-Ret<N> extract( const Vec1<N,E>& a, const El& el );
+Ret<N> insert( const Vec1<N,E>& a, El el );
 }}
 {{dcl end}}
 
diff --git a/doc/wiki/misc/make_float.mwiki b/doc/wiki/misc/make_float.mwiki
index 2514a32f..e5ddb679 100644
--- a/doc/wiki/misc/make_float.mwiki
+++ b/doc/wiki/misc/make_float.mwiki
@@ -50,6 +50,8 @@ The first set of overloads loads to given non-mask vector type.
 
 The second set of overloads returns a value of unspecified type with which any non-mask vector can be initialized. E.g. {{c|1=uint32<4> x = make_float(1, 2, 3, 4);}}
 
+The difference between {{ltf|misc/make_int}}, {{ltf|misc/make_uint}} and {{ltf|misc/make_float}} functions is only in the types of the parameters. The reason for existence of separate functions is to make coercion of literals of different types easier. Assigning e.g. {{c|make_int(2)}}, {{c|make_uint(2)}} or {{c|make_float(2)}} to a vector of any type will result in the same behavior: all elements within the vector will be set to 2. The bitwise representation of the resulting vectors will be different depending on the type of the vector.
+
 ===Parameters===
 {{par begin}}
 {{par | v0, ..., v15 | the values to initialize the vector to}}
@@ -83,4 +85,7 @@ r = [ v0 v1 .. v7 v0  ... v7 ]
 }}
 
 ===See also===
-{{todo}}
+{{dsc begin}}
+{{dsc inc | misc/dsc make_int }}
+{{dsc inc | misc/dsc make_uint }}
+{{dsc end}}
diff --git a/doc/wiki/misc/make_int.mwiki b/doc/wiki/misc/make_int.mwiki
index 7f324900..e4d9cf61 100644
--- a/doc/wiki/misc/make_int.mwiki
+++ b/doc/wiki/misc/make_int.mwiki
@@ -50,6 +50,8 @@ The first set of overloads loads to given non-mask vector type.
 
 The second set of overloads returns a value of unspecified type with which any non-mask vector can be initialized. E.g. {{c|1=int32<4> x = make_int(1, 2, 3, 4);}}
 
+The difference between {{ltf|misc/make_int}}, {{ltf|misc/make_uint}} and {{ltf|misc/make_float}} functions is only in the types of the parameters. The reason for existence of separate functions is to make coercion of literals of different types easier. Assigning e.g. {{c|make_int(2)}}, {{c|make_uint(2)}} or {{c|make_float(2)}} to a vector of any type will result in the same behavior: all elements within the vector will be set to 2. The bitwise representation of the resulting vectors will be different depending on the type of the vector.
+
 ===Parameters===
 {{par begin}}
 {{par | v0, ..., v15 | the values to initialize the vector to}}
@@ -83,4 +85,7 @@ r = [ v0 v1 .. v7 v0  ... v7 ]
 }}
 
 ===See also===
-{{todo}}
+{{dsc begin}}
+{{dsc inc | misc/dsc make_uint }}
+{{dsc inc | misc/dsc make_float }}
+{{dsc end}}
diff --git a/doc/wiki/misc/make_uint.mwiki b/doc/wiki/misc/make_uint.mwiki
index 50747800..c2f2d7d4 100644
--- a/doc/wiki/misc/make_uint.mwiki
+++ b/doc/wiki/misc/make_uint.mwiki
@@ -50,6 +50,8 @@ The first set of overloads loads to given non-mask vector type.
 
 The second set of overloads returns a value of unspecified type with which any non-mask vector can be initialized. E.g. {{c|1=uint32<4> x = make_uint(1, 2, 3, 4);}}
 
+The difference between {{ltf|misc/make_int}}, {{ltf|misc/make_uint}} and {{ltf|misc/make_float}} functions is only in the types of the parameters. The reason for existence of separate functions is to make coercion of literals of different types easier. Assigning e.g. {{c|make_int(2)}}, {{c|make_uint(2)}} or {{c|make_float(2)}} to a vector of any type will result in the same behavior: all elements within the vector will be set to 2. The bitwise representation of the resulting vectors will be different depending on the type of the vector.
+
 ===Parameters===
 {{par begin}}
 {{par | v0, ..., v15 | the values to initialize the vector to}}
@@ -83,4 +85,7 @@ r = [ v0 v1 .. v7 v0  ... v7 ]
 }}
 
 ===See also===
-{{todo}}
+{{dsc begin}}
+{{dsc inc | misc/dsc make_int }}
+{{dsc inc | misc/dsc make_float }}
+{{dsc end}}
diff --git a/doc/wiki/misc/test_bits_any.mwiki b/doc/wiki/misc/test_bits_any.mwiki
index 499078fd..366a31e1 100644
--- a/doc/wiki/misc/test_bits_any.mwiki
+++ b/doc/wiki/misc/test_bits_any.mwiki
@@ -19,7 +19,7 @@ Checks whether any bits are set in the given vector.
 
 ===Equivalent operation===
 {{source|1=
-r = (bool)(a0 & a1 & a2 & ... & aN)
+r = (bool)(a0 {{!}} a1 {{!}} a2 {{!}} ... {{!}} aN)
 }}
 
 ===See also===
diff --git a/simdpp/core/bit_or.h b/simdpp/core/bit_or.h
index 225419a4..59d967bf 100644
--- a/simdpp/core/bit_or.h
+++ b/simdpp/core/bit_or.h
@@ -40,74 +40,74 @@ typename detail::get_expr_bit_or<V1, V2>::type
 
 // support scalar arguments
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, unsigned, V>::type
+typename detail::get_expr_bit_or<unsigned, V>::type
         bit_or(const unsigned& a, const any_vec<N,V>& b)
 {
     return { { a, b.wrapped() } };
 }
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, unsigned long, V>::type
+typename detail::get_expr_bit_or<unsigned long, V>::type
         bit_or(const unsigned long& a, const any_vec<N,V>& b)
 {
     return { { a, b.wrapped() } };
 }
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, unsigned long long, V>::type
+typename detail::get_expr_bit_or<unsigned long long, V>::type
         bit_or(const unsigned long long& a, const any_vec<N,V>& b)
 {
     return { { a, b.wrapped() } };
 }
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, int, V>::type
+typename detail::get_expr_bit_or<int, V>::type
         bit_or(const int& a, const any_vec<N,V>& b)
 {
     return { { a, b.wrapped() } };
 }
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, long, V>::type
+typename detail::get_expr_bit_or<long, V>::type
         bit_or(const long& a, const any_vec<N,V>& b)
 {
     return { { a, b.wrapped() } };
 }
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, long long, V>::type
+typename detail::get_expr_bit_or<long long, V>::type
         bit_or(const long long& a, const any_vec<N,V>& b)
 {
     return { { a, b.wrapped() } };
 }
 
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, V, unsigned>::type
+typename detail::get_expr_bit_or<V, unsigned>::type
         bit_or(const any_vec<N,V>& a, const unsigned& b)
 {
     return { { a.wrapped(), b } };
 }
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, V, unsigned long>::type
+typename detail::get_expr_bit_or<V, unsigned long>::type
         bit_or(const any_vec<N,V>& a, const unsigned long& b)
 {
     return { { a.wrapped(), b } };
 }
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, V, unsigned long long>::type
+typename detail::get_expr_bit_or<V, unsigned long long>::type
         bit_or(const any_vec<N,V>& a, const unsigned long long& b)
 {
     return { { a.wrapped(), b } };
 }
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, V, int>::type
+typename detail::get_expr_bit_or<V, int>::type
         bit_or(const any_vec<N,V>& a, const int& b)
 {
     return { { a.wrapped(), b } };
 }
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, V, long>::type
+typename detail::get_expr_bit_or<V, long>::type
         bit_or(const any_vec<N,V>& a, const long& b)
 {
     return { { a.wrapped(), b } };
 }
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, V, long long>::type
+typename detail::get_expr_bit_or<V, long long>::type
         bit_or(const any_vec<N,V>& a, const long long& b)
 {
     return { { a.wrapped(), b } };
diff --git a/simdpp/core/detail/get_expr_bitwise.h b/simdpp/core/detail/get_expr_bitwise.h
index 06c0cc99..58f18823 100644
--- a/simdpp/core/detail/get_expr_bitwise.h
+++ b/simdpp/core/detail/get_expr_bitwise.h
@@ -132,7 +132,7 @@ struct get_expr_bitwise2_and {
 */
 
 template<class V1, class V2>
-class get_expr_bit_or {
+struct get_expr_bitwise2_or_impl {
     using tags = expr2_maybe_scalar_tags<V1, V2>;
 
     // (size_tag) get the size tag of the resulting expression
@@ -146,27 +146,29 @@ class get_expr_bit_or {
                                     ? tags::v1_type_tag : tags::v2_type_tag;
     static const bool is_mask_op1 = type_tag_t1 == SIMDPP_TAG_MASK_INT ||
                                     type_tag_t1 == SIMDPP_TAG_MASK_FLOAT;
-    static const unsigned type_tag = (is_mask_op1 &&
-                                      tags::v1_size_tag != tags::v2_size_tag)
+    static const unsigned type_tag =
+            (is_mask_op1 && tags::v1_size_tag != tags::v2_size_tag)
                                     ? SIMDPP_TAG_UINT : type_tag_t1;
 
     // strip signed integer types
     static const unsigned v12_type_tag = type_tag == SIMDPP_TAG_INT
                                     ? SIMDPP_TAG_UINT : type_tag;
 
-
-public:
     using v1_final_type = typename type_of_tag<v12_type_tag + size_tag,
                                                tags::length_bytes, void>::type;
     using v2_final_type = typename type_of_tag<v12_type_tag + size_tag,
                                                tags::length_bytes, void>::type;
+};
 
-    using type = typename type_of_tag<type_tag + size_tag, tags::length_bytes,
+template<class V1, class V2>
+struct get_expr_bit_or {
+    using impl = get_expr_bitwise2_or_impl<V1, V2>;
+    using type = typename type_of_tag<impl::type_tag + impl::size_tag,
+                                      impl::tags::length_bytes,
                                       expr_bit_or<V1, V2>>::type;
 };
 
 
-
 } // namespace detail
 } // namespace SIMDPP_ARCH_NAMESPACE
 } // namespace simdpp
diff --git a/simdpp/detail/expr/bit_or.h b/simdpp/detail/expr/bit_or.h
index d429a1fc..b24efe6e 100644
--- a/simdpp/detail/expr/bit_or.h
+++ b/simdpp/detail/expr/bit_or.h
@@ -25,7 +25,7 @@ template<class R, class E1, class E2>
 struct expr_eval<R, expr_bit_or<E1, E2>> {
     static SIMDPP_INL R eval(const expr_bit_or<E1, E2>& e)
     {
-        using E = get_expr_bit_or<E1, E2>;
+        using E = get_expr_bitwise2_or_impl<E1, E2>;
         return (R) insn::i_bit_or(
                 eval_maybe_scalar_bitwise<typename E::v1_final_type, E1>::eval(e.a),
                 eval_maybe_scalar_bitwise<typename E::v2_final_type, E2>::eval(e.b));
diff --git a/simdpp/detail/insn/cmp_neq.h b/simdpp/detail/insn/cmp_neq.h
index 1cac69bc..70e7ea1f 100644
--- a/simdpp/detail/insn/cmp_neq.h
+++ b/simdpp/detail/insn/cmp_neq.h
@@ -192,7 +192,7 @@ mask_int64x2 i_cmp_neq(const uint64x2& a, const uint64x2& b)
     return bit_not(cmp_eq(a, b));
 #elif SIMDPP_USE_SSE2
     uint64x2 r32, r32s;
-    r32 = (uint32x4)cmp_eq(uint32x4(a), uint32x4(b));
+    r32 = cmp_eq(uint32x4(a), uint32x4(b));
     // swap the 32-bit halves
     r32s = bit_or(shift_l<32>(r32), shift_r<32>(r32));
     // combine the results. Each 32-bit half is ORed with the neighbouring pair
@@ -381,4 +381,3 @@ typename V::mask_vector_type i_cmp_neq(const V& a, const V& b)
 } // namespace simdpp
 
 #endif
-
diff --git a/simdpp/operators/bit_or.h b/simdpp/operators/bit_or.h
index 3c6ecc3d..d3a8f91a 100644
--- a/simdpp/operators/bit_or.h
+++ b/simdpp/operators/bit_or.h
@@ -41,74 +41,74 @@ typename detail::get_expr_bit_or<V1, V2>::type
 
 // support scalar arguments
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, unsigned, V>::type
+typename detail::get_expr_bit_or<unsigned, V>::type
         operator|(const unsigned& a, const any_vec<N,V>& b)
 {
     return { { a, b.wrapped() } };
 }
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, unsigned long, V>::type
+typename detail::get_expr_bit_or<unsigned long, V>::type
         operator|(const unsigned long& a, const any_vec<N,V>& b)
 {
     return { { a, b.wrapped() } };
 }
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, unsigned long long, V>::type
+typename detail::get_expr_bit_or<unsigned long long, V>::type
         operator|(const unsigned long long& a, const any_vec<N,V>& b)
 {
     return { { a, b.wrapped() } };
 }
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, int, V>::type
+typename detail::get_expr_bit_or<int, V>::type
         operator|(const int& a, const any_vec<N,V>& b)
 {
     return { { a, b.wrapped() } };
 }
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, long, V>::type
+typename detail::get_expr_bit_or<long, V>::type
         operator|(const long& a, const any_vec<N,V>& b)
 {
     return { { a, b.wrapped() } };
 }
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, long long, V>::type
+typename detail::get_expr_bit_or<long long, V>::type
         operator|(const long long& a, const any_vec<N,V>& b)
 {
     return { { a, b.wrapped() } };
 }
 
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, V, unsigned>::type
+typename detail::get_expr_bit_or<V, unsigned>::type
         operator|(const any_vec<N,V>& a, const unsigned& b)
 {
     return { { a.wrapped(), b } };
 }
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, V, unsigned long>::type
+typename detail::get_expr_bit_or<V, unsigned long>::type
         operator|(const any_vec<N,V>& a, const unsigned long& b)
 {
     return { { a.wrapped(), b } };
 }
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, V, unsigned long long>::type
+typename detail::get_expr_bit_or<V, unsigned long long>::type
         operator|(const any_vec<N,V>& a, const unsigned long long& b)
 {
     return { { a.wrapped(), b } };
 }
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, V, int>::type
+typename detail::get_expr_bit_or<V, int>::type
         operator|(const any_vec<N,V>& a, const int& b)
 {
     return { { a.wrapped(), b } };
 }
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, V, long>::type
+typename detail::get_expr_bit_or<V, long>::type
         operator|(const any_vec<N,V>& a, const long& b)
 {
     return { { a.wrapped(), b } };
 }
 template<unsigned N, class V> SIMDPP_INL
-typename detail::get_expr_bitwise2_and<expr_bit_or, V, long long>::type
+typename detail::get_expr_bit_or<V, long long>::type
         operator|(const any_vec<N,V>& a, const long long& b)
 {
     return { { a.wrapped(), b } };
diff --git a/test/insn/bitwise.cc b/test/insn/bitwise.cc
index 94ea1f51..8cedcb6d 100644
--- a/test/insn/bitwise.cc
+++ b/test/insn/bitwise.cc
@@ -118,6 +118,13 @@ void test_bitwise_n(TestResultsSet& tc, TestReporter& tr)
             TEST_PUSH(tc, float32_n, bit_andnot(v.f32[i], m.f32[j]));
             TEST_PUSH(tc, float64_n, bit_andnot(v.f64[i], m.f64[j]));
 
+            TEST_PUSH(tc, uint8_n,   bit_or(v.u8[i], m.u8[j]));
+            TEST_PUSH(tc, uint16_n,  bit_or(v.u16[i], m.u16[j]));
+            TEST_PUSH(tc, uint32_n,  bit_or(v.u32[i], m.u32[j]));
+            TEST_PUSH(tc, uint64_n,  bit_or(v.u64[i], m.u64[j]));
+            TEST_PUSH(tc, float32_n, bit_or(v.f32[i], m.f32[j]));
+            TEST_PUSH(tc, float64_n, bit_or(v.f64[i], m.f64[j]));
+
             // masks
             TEST_PUSH(tc, uint8_n,   bit_and(m.u8[i], m.u8[j]));
             TEST_PUSH(tc, uint16_n,  bit_and(m.u16[i], m.u16[j]));