From 3e42434f652e4df57356418bf95e5e4855f1488d Mon Sep 17 00:00:00 2001
From: tomasnykodym <tomas.nykodym@gmail.com>
Date: Mon, 22 Aug 2016 11:44:16 -0700
Subject: [PATCH] PUBDEV-1612: orc parser. (#93)

Built on top of Michal M's avro-parser.
Using Tomas parser setup with modifications to support orc parsing.
Additional tests added by Nidhi.  She fixes R unit test on HDFS.
Needed extra help from Jeff G on lot of setup issues.

Includes:
  - Added corresponding HDFS tests from pyunit tests.
  - Removed bad tests.
---
 build.gradle                                  |   4 +-
 gradle.properties                             |  16 +-
 h2o-app/build.gradle                          |   2 +
 h2o-assembly/build.gradle                     |   3 +
 .../java/water/api/ParseSetupHandler.java     |  16 +-
 .../src/main/java/water/fvec/FileVec.java     |  23 +-
 .../water/parser/DefaultParserProviders.java  |  17 +-
 .../java/water/parser/FVecParseWriter.java    |   5 +-
 .../src/main/java/water/parser/ORCParser.java |   7 -
 .../main/java/water/parser/ParseDataset.java  |  47 +-
 .../main/java/water/parser/ParseSetup.java    |  43 +-
 .../java/water/parser/ParserProvider.java     |  22 +-
 .../rapids/ast/prims/mungers/AstFlatten.java  |   6 +-
 h2o-core/src/main/java/water/util/Log.java    |  13 +-
 h2o-core/src/test/java/water/TestUtil.java    |  81 +++
 .../test/java/water/parser/ParserTest2.java   |  18 +-
 .../test/java/water/rapids/GroupingBench.java |   2 +-
 h2o-hadoop/assemblyjar.gradle                 |  12 +
 h2o-hadoop/h2o-cdh5.2-assembly/build.gradle   |   2 +
 h2o-hadoop/h2o-cdh5.2/build.gradle            |   1 +
 h2o-hadoop/h2o-cdh5.3-assembly/build.gradle   |   2 +
 h2o-hadoop/h2o-cdh5.4.2-assembly/build.gradle |   6 +-
 h2o-hadoop/h2o-cdh5.5.3-assembly/build.gradle |   4 +-
 h2o-hadoop/h2o-cdh5.6.0-assembly/build.gradle |   2 +
 h2o-hadoop/h2o-cdh5.7.0-assembly/build.gradle |   2 +
 h2o-hadoop/h2o-hdp2.1-assembly/build.gradle   |   3 +
 h2o-hadoop/h2o-hdp2.2-assembly/build.gradle   |   2 +
 h2o-hadoop/h2o-hdp2.3-assembly/build.gradle   |   3 +
 h2o-hadoop/h2o-hdp2.4-assembly/build.gradle   |   2 +
 .../h2o-mapr3.1.1-assembly/build.gradle       |   1 +
 .../h2o-mapr4.0.1-assembly/build.gradle       |   1 +
 h2o-hadoop/h2o-mapr5.0-assembly/build.gradle  |   1 +
 h2o-hadoop/h2o-mapr5.1-assembly/build.gradle  |   1 +
 .../water/parser/avro/AvroParserProvider.java |   4 +-
 h2o-parsers/h2o-orc-parser/build.gradle       |  38 ++
 .../main/java/water/parser/orc/OrcParser.java | 614 ++++++++++++++++++
 .../water/parser/orc/OrcParserProvider.java   | 138 ++++
 .../main/java/water/parser/orc/OrcUtil.java   |  72 ++
 .../services/water.parser.ParserProvider      |   1 +
 .../water/parser/ParseTestMultiFileOrc.java   |  55 ++
 .../java/water/parser/ParseTestORCCSV.java    |  78 +++
 .../test/java/water/parser/ParseTestOrc.java  | 434 +++++++++++++
 h2o-parsers/h2o-orc-parser/testMultiNode.sh   | 124 ++++
 h2o-persist-hdfs/build.gradle                 |  16 +-
 .../main/java/water/persist/PersistHdfs.java  |  22 +-
 h2o-py/h2o/expr.py                            |   3 +-
 h2o-py/tests/pyunit_utils/utilsPY.py          | 247 +++++++
 h2o-py/tests/testdir_hdfs/index.list          |   7 +
 .../pyunit_INTERNAL_HDFS_airlines_orc.py      |  81 +++
 .../pyunit_INTERNAL_HDFS_baddata_orc.py       |  44 ++
 ...NTERNAL_HDFS_hexdev_29_import_types_orc.py |  45 ++
 ...HDFS_import_folder_airline_05_orc_large.py |  73 +++
 .../pyunit_INTERNAL_HDFS_import_folder_orc.py |  46 ++
 ...nit_INTERNAL_HDFS_iris_import_types_orc.py |  44 ++
 ...pyunit_INTERNAL_HDFS_milsongs_orc_large.py |  45 ++
 .../pyunit_INTERNAL_HDFS_orc_parser.py        |  60 ++
 .../pyunit_INTERNAL_HDFS_prostate_orc.py      |  48 ++
 ...pyunit_INTERNAL_HDFS_timestamp_date_orc.py |  57 ++
 .../pyunit_NOFEATURE_orc_parser.py            |  53 ++
 .../pyunit_NOFEATURE_orc_parser_baddata.py    |  27 +
 ...ATURE_orc_parser_hexdev_29_import_types.py |  30 +
 ...unit_NOFEATURE_orc_parser_import_folder.py |  30 +
 ..._parser_import_folder_airline_05p_large.py |  61 ++
 ...orc_parser_import_folder_milsongs_large.py |  30 +
 ..._NOFEATURE_orc_parser_iris_import_types.py |  30 +
 .../pyunit_NOFEATURE_orc_parser_prostate.py   |  33 +
 ...nit_orc_NOFEATURE_parser_timestamp_date.py |  49 ++
 .../runit_INTERNAL_HDFS_airlines_orc.R        |  67 ++
 .../runit_NOFEATURE_orc_parser.R              |  40 ++
 ..._NOFEATURE_orc_parser_airlines_05p_large.R |  46 ++
 .../runit_NOFEATURE_orc_parser_baddata.R      |  28 +
 ...unit_orc_NOFEATURE_parser_milsongs_large.R |  33 +
 scripts/run.py                                |   2 +-
 scripts/saveTableAsOrc.textile                | 161 ++++-
 settings.gradle                               |  12 +-
 75 files changed, 3391 insertions(+), 107 deletions(-)
 delete mode 100644 h2o-core/src/main/java/water/parser/ORCParser.java
 create mode 100644 h2o-parsers/h2o-orc-parser/build.gradle
 create mode 100644 h2o-parsers/h2o-orc-parser/src/main/java/water/parser/orc/OrcParser.java
 create mode 100644 h2o-parsers/h2o-orc-parser/src/main/java/water/parser/orc/OrcParserProvider.java
 create mode 100644 h2o-parsers/h2o-orc-parser/src/main/java/water/parser/orc/OrcUtil.java
 create mode 100644 h2o-parsers/h2o-orc-parser/src/main/resources/META-INF/services/water.parser.ParserProvider
 create mode 100644 h2o-parsers/h2o-orc-parser/src/test/java/water/parser/ParseTestMultiFileOrc.java
 create mode 100644 h2o-parsers/h2o-orc-parser/src/test/java/water/parser/ParseTestORCCSV.java
 create mode 100644 h2o-parsers/h2o-orc-parser/src/test/java/water/parser/ParseTestOrc.java
 create mode 100755 h2o-parsers/h2o-orc-parser/testMultiNode.sh
 create mode 100644 h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_airlines_orc.py
 create mode 100644 h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_baddata_orc.py
 create mode 100644 h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_hexdev_29_import_types_orc.py
 create mode 100644 h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_import_folder_airline_05_orc_large.py
 create mode 100644 h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_import_folder_orc.py
 create mode 100644 h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_iris_import_types_orc.py
 create mode 100644 h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_milsongs_orc_large.py
 create mode 100644 h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_orc_parser.py
 create mode 100644 h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_prostate_orc.py
 create mode 100644 h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_timestamp_date_orc.py
 create mode 100644 h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser.py
 create mode 100644 h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_baddata.py
 create mode 100644 h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_hexdev_29_import_types.py
 create mode 100644 h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_import_folder.py
 create mode 100644 h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_import_folder_airline_05p_large.py
 create mode 100644 h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_import_folder_milsongs_large.py
 create mode 100644 h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_iris_import_types.py
 create mode 100644 h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_prostate.py
 create mode 100644 h2o-py/tests/testdir_parser/pyunit_orc_NOFEATURE_parser_timestamp_date.py
 create mode 100644 h2o-r/tests/testdir_hdfs/runit_INTERNAL_HDFS_airlines_orc.R
 create mode 100644 h2o-r/tests/testdir_parser/runit_NOFEATURE_orc_parser.R
 create mode 100644 h2o-r/tests/testdir_parser/runit_NOFEATURE_orc_parser_airlines_05p_large.R
 create mode 100644 h2o-r/tests/testdir_parser/runit_NOFEATURE_orc_parser_baddata.R
 create mode 100644 h2o-r/tests/testdir_parser/runit_orc_NOFEATURE_parser_milsongs_large.R

diff --git a/build.gradle b/build.gradle
index 77694e827f69..f44ad8a7d4f0 100644
--- a/build.gradle
+++ b/build.gradle
@@ -54,7 +54,8 @@ ext {
       project(':h2o-persist-s3'),
       project(':h2o-genmodel'),
       project(':h2o-bindings'),
-      project(':h2o-avro-parser')
+      project(':h2o-avro-parser'),
+      project(':h2o-orc-parser'),
     ]
 
     javaProjects = [
@@ -69,6 +70,7 @@ ext {
       project(':h2o-genmodel'),
       project(':h2o-bindings'),
       project(':h2o-avro-parser'),
+      project(':h2o-orc-parser'),
     ]
 
     scalaProjects = [
diff --git a/gradle.properties b/gradle.properties
index 3dd45ffef709..787033053579 100644
--- a/gradle.properties
+++ b/gradle.properties
@@ -10,5 +10,19 @@ doJava6Bytecode=auto
 # Run animal sniffer - by default false, but if java6 bytecode is requested 
 # then animal sniffer is run
 doAnimalSniffer=false
-# Increase PermGen size for build
+# The flag to include ORC support inside default h2o.jar.
+# WARNING: this will upgrade default Hadoop client version to one supporting ORC
+doIncludeOrc=false
+#
+# Version of hadoop dependency which is used for jUnit test execution
+#
+orcDefaultHadoopClientVersion=2.6.0-cdh5.4.0
+orcDefaultHiveExecVersion=1.1.0-cdh5.4.0
+#
+# Default hadoop client version
+#
+defaultHadoopClientVersion=2.0.0-cdh4.3.0
+#
+# Gradle arguments
+#
 org.gradle.jvmargs='-XX:MaxPermSize=384m'
diff --git a/h2o-app/build.gradle b/h2o-app/build.gradle
index a5c985d4ad66..263a15f3d069 100644
--- a/h2o-app/build.gradle
+++ b/h2o-app/build.gradle
@@ -9,5 +9,7 @@ dependencies {
   compile project(":h2o-core")
   compile project(":h2o-genmodel")
   compile project(":h2o-avro-parser")
+  // Note: orc parser is included at the assembly level for each 
+  // Hadoop distribution
 }
 
diff --git a/h2o-assembly/build.gradle b/h2o-assembly/build.gradle
index 81318cbb6af3..20830c8b583a 100644
--- a/h2o-assembly/build.gradle
+++ b/h2o-assembly/build.gradle
@@ -7,6 +7,9 @@ dependencies {
   compile project(":h2o-app")
   compile project(":h2o-persist-s3")
   compile project(":h2o-persist-hdfs")
+  if (project.hasProperty("doIncludeOrc") && project.doIncludeOrc == "true") {
+    compile project(":h2o-orc-parser")
+  }
   compile "org.slf4j:slf4j-log4j12:1.7.5"
 }
 
diff --git a/h2o-core/src/main/java/water/api/ParseSetupHandler.java b/h2o-core/src/main/java/water/api/ParseSetupHandler.java
index 4abc9dffc2b5..b6124bcae226 100644
--- a/h2o-core/src/main/java/water/api/ParseSetupHandler.java
+++ b/h2o-core/src/main/java/water/api/ParseSetupHandler.java
@@ -9,7 +9,9 @@
 import water.Key;
 import water.api.schemas3.ParseSetupV3;
 import water.exceptions.H2OIllegalArgumentException;
+import water.parser.ParseDataset;
 import water.parser.ParseSetup;
+import water.util.DistributedException;
 import water.util.PojoUtils;
 
 import static water.parser.DefaultParserProviders.GUESS_INFO;
@@ -33,9 +35,17 @@ public ParseSetupV3 guessSetup(int version, ParseSetupV3 p) {
     if (p.na_strings != null)
       for(int i = 0; i < p.na_strings.length; i++)
         if (p.na_strings[i] != null && p.na_strings[i].length == 0) p.na_strings[i] = null;
-
-    ParseSetup ps = ParseSetup.guessSetup(fkeys, new ParseSetup(p));
-
+    ParseSetup ps;
+    try{
+      ps = ParseSetup.guessSetup(fkeys, new ParseSetup(p));
+    } catch(Throwable ex) {
+      Throwable ex2 = ex;
+      if(ex instanceof DistributedException)
+        ex2 = ex.getCause();
+      if(ex2 instanceof ParseDataset.H2OParseException)
+        throw new H2OIllegalArgumentException(ex2.getMessage());
+      throw ex;
+    }
     if(ps._errs != null && ps._errs.length > 0) {
       p.warnings = new String[ps._errs.length];
       for (int i = 0; i < ps._errs.length; ++i)
diff --git a/h2o-core/src/main/java/water/fvec/FileVec.java b/h2o-core/src/main/java/water/fvec/FileVec.java
index 1878ff475638..99d0a30f932b 100644
--- a/h2o-core/src/main/java/water/fvec/FileVec.java
+++ b/h2o-core/src/main/java/water/fvec/FileVec.java
@@ -9,6 +9,18 @@ public abstract class FileVec extends ByteVec {
   long _len;                    // File length
   final byte _be;
 
+  // Returns String with path for given key.
+  public static String getPathForKey(Key k) {
+    final int off = k._kb[0]==Key.CHK   || k._kb[0]==Key.VEC ? Vec.KEY_PREFIX_LEN : 0;
+    String p = new String(k._kb,off,k._kb.length-off);
+
+    if(p.startsWith("nfs:/"))
+      p = p.substring("nfs:/".length());
+    else if (p.startsWith("nfs:\\"))
+      p = p.substring("nfs:\\".length());
+
+    return p;
+  }
   /** Log-2 of Chunk size. */
   public static final int DFLT_LOG2_CHUNK_SIZE = 20/*1Meg*/+2/*4Meg*/;
   /** Default Chunk size in bytes, useful when breaking up large arrays into
@@ -16,13 +28,17 @@ public abstract class FileVec extends ByteVec {
    *  costs, lower increases fine-grained parallelism. */
   public static final int DFLT_CHUNK_SIZE = 1 << DFLT_LOG2_CHUNK_SIZE;
   public int _chunkSize = DFLT_CHUNK_SIZE;
+  public int _nChunks = -1;
 
   protected FileVec(Key key, long len, byte be) {
     super(key,-1/*no rowLayout*/);
     _len = len;
     _be = be;
   }
-
+  public void setNChunks(int n){
+    _nChunks = n;
+    setChunkSize((int)length()/n);
+  }
   /**
    * Chunk size must be positive, 1G or less, and a power of two.
    * Any values that aren't a power of two will be reduced to the
@@ -36,6 +52,7 @@ protected FileVec(Key key, long len, byte be) {
    * @return actual _chunkSize setting
    */
   public int setChunkSize(int chunkSize) { return setChunkSize(null, chunkSize); }
+
   public int setChunkSize(Frame fr, int chunkSize) {
     // Clear cached chunks first
     // Peeking into a file before the chunkSize has been set
@@ -63,7 +80,11 @@ public int setChunkSize(Frame fr, int chunkSize) {
   }
 
   @Override public long length() { return _len; }
+
+
   @Override public int nChunks() {
+    if(_nChunks != -1) // number of chunks can be set explicitly
+      return _nChunks;
     return (int)Math.max(1,_len / _chunkSize + ((_len % _chunkSize != 0)?1:0));
   }
   @Override public boolean writable() { return false; }
diff --git a/h2o-core/src/main/java/water/parser/DefaultParserProviders.java b/h2o-core/src/main/java/water/parser/DefaultParserProviders.java
index 808567c7883c..7d0803abb4fb 100644
--- a/h2o-core/src/main/java/water/parser/DefaultParserProviders.java
+++ b/h2o-core/src/main/java/water/parser/DefaultParserProviders.java
@@ -6,6 +6,7 @@
 
 import water.Job;
 import water.Key;
+import water.fvec.ByteVec;
 import water.util.Log;
 
 /**
@@ -23,7 +24,7 @@ public final class DefaultParserProviders {
   public static final ParserInfo SVMLight_INFO = new ParserInfo("SVMLight", 1000, true);
   public static final ParserInfo CSV_INFO = new ParserInfo("CSV", Integer.MAX_VALUE, true);
   public static final ParserInfo GUESS_INFO = new ParserInfo("GUESS", -10000, false);
-  /** Priority of non-core parsers shoudl begin here.*/
+  /** Priority of non-core parsers should begin here.*/
   public static final int MAX_CORE_PRIO = 10000;
 
   public final static class ArffParserProvider extends AbstractParserProvide  {
@@ -39,7 +40,7 @@ public Parser createParser(ParseSetup setup, Key<Job> jobKey) {
     }
 
     @Override
-    public ParseSetup guessSetup(byte[] bits, byte sep, int ncols, boolean singleQuotes,
+    public ParseSetup guessSetup(ByteVec bv, byte[] bits, byte sep, int ncols, boolean singleQuotes,
                                  int checkHeader, String[] columnNames, byte[] columnTypes,
                                  String[][] domains, String[][] naStrings) {
       return ARFFParser.guessSetup(bits, sep, singleQuotes, columnNames, naStrings);
@@ -59,7 +60,7 @@ public Parser createParser(ParseSetup setup, Key<Job> jobKey) {
     }
 
     @Override
-    public ParseSetup guessSetup(byte[] bits, byte sep, int ncols, boolean singleQuotes,
+    public ParseSetup guessSetup(ByteVec bv, byte[] bits, byte sep, int ncols, boolean singleQuotes,
                                  int checkHeader, String[] columnNames, byte[] columnTypes,
                                  String[][] domains, String[][] naStrings) {
       return XlsParser.guessSetup(bits);
@@ -79,7 +80,7 @@ public Parser createParser(ParseSetup setup, Key<Job> jobKey) {
     }
 
     @Override
-    public ParseSetup guessSetup(byte[] bits, byte sep, int ncols, boolean singleQuotes,
+    public ParseSetup guessSetup(ByteVec bv, byte[] bits, byte sep, int ncols, boolean singleQuotes,
                                  int checkHeader, String[] columnNames, byte[] columnTypes,
                                  String[][] domains, String[][] naStrings) {
       return SVMLightParser.guessSetup(bits);
@@ -99,7 +100,7 @@ public Parser createParser(ParseSetup setup, Key<Job> jobKey) {
     }
 
     @Override
-    public ParseSetup guessSetup(byte[] bits, byte sep, int ncols, boolean singleQuotes,
+    public ParseSetup guessSetup(ByteVec bv, byte[] bits, byte sep, int ncols, boolean singleQuotes,
                                  int checkHeader, String[] columnNames, byte[] columnTypes,
                                  String[][] domains, String[][] naStrings) {
       return CsvParser.guessSetup(bits, sep, ncols, singleQuotes, checkHeader, columnNames, columnTypes, naStrings);
@@ -119,7 +120,7 @@ public Parser createParser(ParseSetup setup, Key<Job> jobKey) {
     }
 
     @Override
-    public ParseSetup guessSetup(byte[] bits, byte sep, int ncols, boolean singleQuotes,
+    public ParseSetup guessSetup(ByteVec bv, byte[] bits, byte sep, int ncols, boolean singleQuotes,
                                  int checkHeader, String[] columnNames, byte[] columnTypes,
                                  String[][] domains, String[][] naStrings) {
       List<ParserProvider> pps = ParserService.INSTANCE.getAllProviders(true); // Sort them based on priorities
@@ -129,7 +130,7 @@ public ParseSetup guessSetup(byte[] bits, byte sep, int ncols, boolean singleQuo
         if (pp == this || pp.info().equals(GUESS_INFO)) continue;
         // Else try to guess with given provider
         try {
-          ParseSetup ps = pp.guessSetup(bits, sep, ncols, singleQuotes, checkHeader, columnNames, columnTypes, domains, naStrings);
+          ParseSetup ps = pp.guessSetup(bv, bits, sep, ncols, singleQuotes, checkHeader, columnNames, columnTypes, domains, naStrings);
           if( ps != null) {
             return ps;
           }
@@ -142,7 +143,7 @@ public ParseSetup guessSetup(byte[] bits, byte sep, int ncols, boolean singleQuo
     }
   }
 
-  static abstract class AbstractParserProvide implements ParserProvider {
+  static abstract class AbstractParserProvide extends ParserProvider {
 
     @Override
     public ParseSetup createParserSetup(Key[] inputs, ParseSetup requiredSetup) {
diff --git a/h2o-core/src/main/java/water/parser/FVecParseWriter.java b/h2o-core/src/main/java/water/parser/FVecParseWriter.java
index 60bd57aa7e48..3713934da12d 100644
--- a/h2o-core/src/main/java/water/parser/FVecParseWriter.java
+++ b/h2o-core/src/main/java/water/parser/FVecParseWriter.java
@@ -169,7 +169,10 @@ else  if(_errs.length < 20)
     _errCnt++;
   }
 
-  @Override public void setIsAllASCII(int colIdx, boolean b) {_nvs[colIdx]._isAllASCII = b;}
+  @Override public void setIsAllASCII(int colIdx, boolean b) {
+    if(colIdx < _nvs.length)
+      _nvs[colIdx]._isAllASCII = b;
+  }
 
   @Override
   public boolean hasErrors() {
diff --git a/h2o-core/src/main/java/water/parser/ORCParser.java b/h2o-core/src/main/java/water/parser/ORCParser.java
deleted file mode 100644
index 952ec530f987..000000000000
--- a/h2o-core/src/main/java/water/parser/ORCParser.java
+++ /dev/null
@@ -1,7 +0,0 @@
-package water.parser;
-
-/**
- * Created by brandon on 9/22/15.
- */
-public class ORCParser {
-}
diff --git a/h2o-core/src/main/java/water/parser/ParseDataset.java b/h2o-core/src/main/java/water/parser/ParseDataset.java
index ea3f1dccb6a8..d16675399689 100644
--- a/h2o-core/src/main/java/water/parser/ParseDataset.java
+++ b/h2o-core/src/main/java/water/parser/ParseDataset.java
@@ -139,17 +139,22 @@ public static ParseDataset forkParseDataset(final Key<Frame> dest, final Key[] k
     }
     Log.info("Total file size: "+ PrettyPrint.bytes(totalParseSize));
 
-    // set the parse chunk size for files
-    for( int i = 0; i < keys.length; ++i ) {
-      Iced ice = DKV.getGet(keys[i]);
-      if(ice instanceof FileVec) {
-        ((FileVec) ice).setChunkSize(setup._chunk_size);
-        Log.info("Parse chunk size " + setup._chunk_size);
-      } else if(ice instanceof Frame && ((Frame)ice).vec(0) instanceof FileVec) {
-        ((FileVec) ((Frame) ice).vec(0)).setChunkSize((Frame) ice, setup._chunk_size);
-        Log.info("Parse chunk size " + setup._chunk_size);
+
+    // no need to set this for ORC, it is already done:
+    if (!setup.getParseType().name().contains("ORC")) {
+      for( int i = 0; i < keys.length; ++i ) {
+        Iced ice = DKV.getGet(keys[i]);
+
+        // set the parse chunk size for files
+        if (ice instanceof FileVec) {
+          ((FileVec) ice).setChunkSize(setup._chunk_size);
+          Log.info("Parse chunk size " + setup._chunk_size);
+        } else if (ice instanceof Frame && ((Frame) ice).vec(0) instanceof FileVec) {
+          ((FileVec) ((Frame) ice).vec(0)).setChunkSize((Frame) ice, setup._chunk_size);
+          Log.info("Parse chunk size " + setup._chunk_size);
+        }
       }
-    }
+    } else Log.info("Orc Parse chunk sizes may be different across files");
 
     long memsz = H2O.CLOUD.free_mem();
     if( totalParseSize > memsz*4 )
@@ -909,7 +914,7 @@ private FVecParseWriter streamParse(final InputStream is, final ParseSetup local
 
     // ------------------------------------------------------------------------
     private static class DistributedParse extends MRTask<DistributedParse> {
-      private final ParseSetup _setup;
+      private ParseSetup _setup;
       private final int _vecIdStart;
       private final int _startChunkIdx; // for multifile parse, offset of the first chunk in the final dataset
       private final VectorGroup _vg;
@@ -938,9 +943,10 @@ private static class DistributedParse extends MRTask<DistributedParse> {
         super.setupLocal();
         _visited = new NonBlockingSetInt();
         _espc = MemoryManager.malloc8(_nchunks);
+        _setup = ParserService.INSTANCE.getByInfo(_setup._parse_type).setupLocal(_fr.anyVec(),_setup);
       }
       @Override public void map( Chunk in ) {
-        if( _jobKey.get().stop_requested() ) return;
+        if( _jobKey.get().stop_requested() ) throw new Job.JobCancelledException();
         AppendableVec [] avs = new AppendableVec[_setup._number_columns];
         for(int i = 0; i < avs.length; ++i)
           if (_setup._column_types == null) // SVMLight
@@ -956,22 +962,24 @@ private static class DistributedParse extends MRTask<DistributedParse> {
         case "ARFF":
         case "CSV":
           Categorical [] categoricals = categoricals(_cKey, _setup._number_columns);
-          dout = new FVecParseWriter(_vg,_startChunkIdx + in.cidx(), categoricals, _setup._column_types, _setup._chunk_size, avs); //TODO: use _setup._domains instead of categoricals
+          dout = new FVecParseWriter(_vg,_startChunkIdx + in.cidx(), categoricals, _setup._column_types,
+                  _setup._chunk_size, avs); //TODO: use _setup._domains instead of categoricals
           break;
         case "SVMLight":
           dout = new SVMLightFVecParseWriter(_vg, _vecIdStart, in.cidx() + _startChunkIdx, _setup._chunk_size, avs);
           break;
+        case "ORC":  // setup special case for ORC
+          Categorical [] orc_categoricals = categoricals(_cKey, _setup._number_columns);
+          dout = new FVecParseWriter(_vg, in.cidx() + _startChunkIdx, orc_categoricals, _setup._column_types,
+                  _setup._chunk_size, avs);
+          break;
         default: // FIXME: should not be default and creation strategy should be forwarded to ParserProvider
-          dout = new FVecParseWriter(_vg, in.cidx() + _startChunkIdx, null, _setup._column_types, _setup._chunk_size, avs);
+          dout = new FVecParseWriter(_vg, in.cidx() + _startChunkIdx, null, _setup._column_types,
+                  _setup._chunk_size, avs);
           break;
         }
         p.parseChunk(in.cidx(), din, dout);
         (_dout = dout).close(_fs);
-        if(_dout.hasErrors())
-          for(ParseWriter.ParseErr err:_dout._errs) {
-            assert err != null : "Parse error cannot be null!";
-            err._file = _srckey.toString();
-          }
         Job.update(in._len, _jobKey); // Record bytes parsed
         // remove parsed data right away
         freeMem(in);
@@ -1007,6 +1015,7 @@ private void freeMem(Chunk in) {
         _outerMFPT._dout[_outerMFPT._lo] = _dout;
         if(_dout.hasErrors()) {
           ParseWriter.ParseErr [] errs = _dout.removeErrors();
+          for(ParseWriter.ParseErr err:errs)err._file = FileVec.getPathForKey(_srckey).toString();
           Arrays.sort(errs, new Comparator<ParseWriter.ParseErr>() {
             @Override
             public int compare(ParseWriter.ParseErr o1, ParseWriter.ParseErr o2) {
diff --git a/h2o-core/src/main/java/water/parser/ParseSetup.java b/h2o-core/src/main/java/water/parser/ParseSetup.java
index 08ca8d4c604b..594c5b961c68 100644
--- a/h2o-core/src/main/java/water/parser/ParseSetup.java
+++ b/h2o-core/src/main/java/water/parser/ParseSetup.java
@@ -5,6 +5,7 @@
 import water.exceptions.H2OIllegalArgumentException;
 import water.fvec.*;
 import water.util.ArrayUtils;
+import water.util.FileUtils;
 import water.util.Log;
 
 import java.io.BufferedReader;
@@ -25,7 +26,7 @@ public class ParseSetup extends Iced {
   public static final int HAS_HEADER = 1;
   public static final int GUESS_COL_CNT = -1;
 
-  ParserInfo _parse_type;     // CSV, XLS, XSLX, SVMLight, Auto, ARFF
+  ParserInfo _parse_type;     // CSV, XLS, XSLX, SVMLight, Auto, ARFF, ORC
   byte _separator;            // Field separator, usually comma ',' or TAB or space ' '
   // Whether or not single-quotes quote a field.  E.g. how do we parse:
   // raw data:  123,'Mally,456,O'Mally
@@ -40,6 +41,10 @@ public class ParseSetup extends Iced {
   String[][] _na_strings;       // Strings for NA in a given column
   String[][] _data;           // First few rows of parsed/tokenized data
 
+  String [] _fileNames = new String[]{"unknown"};
+
+  public void setFileName(String name) {_fileNames[0] = name;}
+
   public ParseWriter.ParseErr[] _errs;
   public int _chunk_size = FileVec.DFLT_CHUNK_SIZE;  // Optimal chunk size to be used store values
   PreviewParseWriter _column_previews = null;
@@ -51,11 +56,14 @@ public ParseSetup(ParseSetup ps) {
          new ParseWriter.ParseErr[0], ps._chunk_size);
   }
 
+
   public static ParseSetup makeSVMLightSetup(){
     return new ParseSetup(SVMLight_INFO, ParseSetup.GUESS_SEP,
         false,ParseSetup.NO_HEADER,1,null,new byte[]{Vec.T_NUM},null,null,null, new ParseWriter.ParseErr[0]);
   }
 
+  // This method was called during guess setup, lot of things are null, like ctypes.
+  // when it is called again, it either contains the guess column types or it will have user defined column types
   public ParseSetup(ParserInfo parse_type, byte sep, boolean singleQuotes, int checkHeader, int ncols, String[] columnNames, byte[] ctypes, String[][] domains, String[][] naStrings, String[][] data, ParseWriter.ParseErr[] errs, int chunkSize) {
     _parse_type = parse_type;
     _separator = sep;
@@ -339,7 +347,7 @@ public GuessSetupTsk(ParseSetup userSetup) {
                 || bv.length() <= FileVec.DFLT_CHUNK_SIZE
                 || decompRatio > 1.0) { */
         try {
-          _gblSetup = guessSetup(bits, _userSetup);
+          _gblSetup = guessSetup(bv, bits, _userSetup);
           for(ParseWriter.ParseErr e:_gblSetup._errs) {
             e._byteOffset += e._cidx*Parser.StreamData.bufSz;
             e._cidx = 0;
@@ -389,6 +397,7 @@ public GuessSetupTsk(ParseSetup userSetup) {
                   "Remaining files have been ignored.";
         }*/
       }
+      _gblSetup.setFileName(FileUtils.keyToFileName(key));
     }
 
     /**
@@ -430,17 +439,17 @@ private ParseSetup mergeSetups(ParseSetup setupA, ParseSetup setupB, String file
       mergedSetup._check_header = unifyCheckHeader(setupA._check_header, setupB._check_header);
 
       mergedSetup._separator = unifyColumnSeparators(setupA._separator, setupB._separator);
-      mergedSetup._column_names = unifyColumnNames(setupA._column_names, setupB._column_names);
       if (setupA._parse_type.equals(ARFF_INFO) && setupB._parse_type.equals(CSV_INFO))
         ;// do nothing parse_type and col_types are already set correctly
       else if (setupA._parse_type.equals(CSV_INFO) && setupB._parse_type.equals(ARFF_INFO)) {
         mergedSetup._parse_type = ARFF_INFO;
         mergedSetup._column_types = setupB._column_types;
-      } else if (setupA._parse_type.equals(setupB._parse_type)) {
+      } else if (setupA.isCompatible(setupB)) {
         mergedSetup._column_previews = PreviewParseWriter.unifyColumnPreviews(setupA._column_previews, setupB._column_previews);
       } else
-        throw new ParseDataset.H2OParseException("File type mismatch. Cannot parse files of type "
-                + setupA._parse_type + " and " + setupB._parse_type + " as one dataset.");
+        throw new ParseDataset.H2OParseException("File type mismatch. Cannot parse files " + setupA.file() + " and " + setupB.file() + " of type "
+                + setupA._parse_type.name() + " and " + setupB._parse_type.name() + " as one dataset.");
+      mergedSetup._column_names = unifyColumnNames(setupA._column_names, setupB._column_names);
       mergedSetup._number_columns = mergedSetup._parse_type.equals(CSV_INFO) ? Math.max(setupA._number_columns,setupB._number_columns):unifyColumnCount(setupA._number_columns, setupB._number_columns,mergedSetup, fileA, fileB);
       if (mergedSetup._data.length < PreviewParseWriter.MAX_PREVIEW_LINES) {
         int n = mergedSetup._data.length;
@@ -449,6 +458,7 @@ else if (setupA._parse_type.equals(CSV_INFO) && setupB._parse_type.equals(ARFF_I
         System.arraycopy(setupB._data, 1, mergedSetup._data, n, m - n);
       }
       mergedSetup._errs = ArrayUtils.append(setupA._errs,setupB._errs);
+      mergedSetup._fileNames = ArrayUtils.append(setupA._fileNames,setupB._fileNames);
       if(mergedSetup._errs.length > 20)
         mergedSetup._errs = Arrays.copyOf(mergedSetup._errs,20);
       return mergedSetup;
@@ -499,6 +509,18 @@ private static String[] unifyColumnNames(String[] namesA, String[] namesB){
     }
   }
 
+
+  private String file() {
+    String [] names = _fileNames;
+    if(names.length > 5)
+      names = Arrays.copyOf(names,5);
+    return Arrays.toString(names);
+  }
+
+  protected boolean isCompatible(ParseSetup setupB) {
+    return _parse_type.equals(setupB._parse_type) && _number_columns == setupB._number_columns;
+  }
+
   /**
    * Guess everything from a single pile-o-bits.  Used in tests, or in initial
    * parser inspections when the user has not told us anything about separators
@@ -507,14 +529,14 @@ private static String[] unifyColumnNames(String[] namesA, String[] namesB){
    * @param bits Initial bytes from a parse source
    * @return ParseSetup settings from looking at all files
    */
-  public static ParseSetup guessSetup( byte[] bits, ParseSetup userSetup ) {
-    return guessSetup(bits, userSetup._parse_type, userSetup._separator, GUESS_COL_CNT, userSetup._single_quotes, userSetup._check_header, userSetup._column_names, userSetup._column_types, null, null);
+  public static ParseSetup guessSetup( ByteVec bv, byte [] bits, ParseSetup userSetup ) {
+    return guessSetup(bv, bits, userSetup._parse_type, userSetup._separator, GUESS_COL_CNT, userSetup._single_quotes, userSetup._check_header, userSetup._column_names, userSetup._column_types, null, null);
   }
 
-  public static ParseSetup guessSetup(byte[] bits, ParserInfo parserType, byte sep, int ncols, boolean singleQuotes, int checkHeader, String[] columnNames, byte[] columnTypes, String[][] domains, String[][] naStrings ) {
+  public static ParseSetup guessSetup(ByteVec bv, byte [] bits, ParserInfo parserType, byte sep, int ncols, boolean singleQuotes, int checkHeader, String[] columnNames, byte[] columnTypes, String[][] domains, String[][] naStrings ) {
     ParserProvider pp = ParserService.INSTANCE.getByInfo(parserType);
     if (pp != null) {
-      return pp.guessSetup(bits, sep, ncols, singleQuotes, checkHeader, columnNames, columnTypes, domains, naStrings);
+      return pp.guessSetup(bv, bits, sep, ncols, singleQuotes, checkHeader, columnNames, columnTypes, domains, naStrings);
     }
     throw new ParseDataset.H2OParseException("Cannot determine file type.");
   }
@@ -541,6 +563,7 @@ public static String createHexName(String n) {
             || n.endsWith("xls")
             || n.endsWith("txt")
             || n.endsWith("svm")
+            || n.endsWith("orc")
             || n.endsWith("arff"))) {
       n = n.substring(0, dot);
       dot = n.lastIndexOf('.');
diff --git a/h2o-core/src/main/java/water/parser/ParserProvider.java b/h2o-core/src/main/java/water/parser/ParserProvider.java
index 584d76a7334c..01523f198642 100644
--- a/h2o-core/src/main/java/water/parser/ParserProvider.java
+++ b/h2o-core/src/main/java/water/parser/ParserProvider.java
@@ -1,23 +1,24 @@
 package water.parser;
 
-import water.Freezable;
 import water.Job;
 import water.Key;
+import water.fvec.ByteVec;
+import water.fvec.Vec;
 
 /**
  * Generic Parser provider.
  */
-public interface ParserProvider {
+public abstract class ParserProvider {
   /** Technical information for this parser */
-  ParserInfo info();
+  public abstract ParserInfo info();
 
   /** Create a new parser
    */
-  Parser createParser(ParseSetup setup, Key<Job> jobKey);
+  public abstract Parser createParser(ParseSetup setup, Key<Job> jobKey);
 
   /** Returns parser setup of throws exception if input is not recognized */
   // FIXME: should be more flexible
-  ParseSetup guessSetup(byte[] bits, byte sep, int ncols, boolean singleQuotes, int checkHeader, String[] columnNames, byte[] columnTypes, String[][] domains, String[][] naStrings );
+  public abstract ParseSetup guessSetup(ByteVec v, byte[] bits, byte sep, int ncols, boolean singleQuotes, int checkHeader, String[] columnNames, byte[] columnTypes, String[][] domains, String[][] naStrings );
 
   /** Create a parser specific setup.
    *
@@ -26,5 +27,14 @@ public interface ParserProvider {
    * @param requiredSetup  user given parser setup
    * @return  parser specific setup
    */
-  ParseSetup createParserSetup(Key[] inputs, ParseSetup requiredSetup);
+  public abstract ParseSetup createParserSetup(Key[] inputs, ParseSetup requiredSetup);
+
+  /**
+   * Executed exactly once per-file-per-node during parse.
+   * Do any file-related non-distributed setup here. E.g. ORC reader creates node-shared instance of a (non-serializable) Reader.
+   * @param v
+   * @param setup
+   */
+
+  public ParseSetup setupLocal(Vec v, ParseSetup setup){ return setup;}
 }
diff --git a/h2o-core/src/main/java/water/rapids/ast/prims/mungers/AstFlatten.java b/h2o-core/src/main/java/water/rapids/ast/prims/mungers/AstFlatten.java
index 4eb1033c0160..a62bb494cbf7 100644
--- a/h2o-core/src/main/java/water/rapids/ast/prims/mungers/AstFlatten.java
+++ b/h2o-core/src/main/java/water/rapids/ast/prims/mungers/AstFlatten.java
@@ -39,11 +39,11 @@ public Val apply(Env env, Env.StackHelp stk, AstRoot asts[]) {
       case Vec.T_NUM:
         return new ValNum(vec.at(0));
       case Vec.T_TIME:
-        return new ValNum(vec.at8(0));
+        return vec.isNA(0)?new ValNum(Double.NaN) : new ValNum(vec.at8(0)); // check for missing values
       case Vec.T_STR:
         return new ValStr(vec.atStr(new BufferedString(), 0).toString());
-      case Vec.T_CAT:
-        return new ValStr(vec.factor(vec.at8(0)));
+      case Vec.T_CAT: // check for missing values
+        return vec.isNA(0)?new ValStr("NA") : new ValStr(vec.factor(vec.at8(0)));
       default:
         throw H2O.unimpl("The type of vector: " + vec.get_type_str() + " is not supported by " + str());
     }
diff --git a/h2o-core/src/main/java/water/util/Log.java b/h2o-core/src/main/java/water/util/Log.java
index 51e8f07703f9..438930a04ffa 100644
--- a/h2o-core/src/main/java/water/util/Log.java
+++ b/h2o-core/src/main/java/water/util/Log.java
@@ -1,16 +1,16 @@
 package water.util;
 
-import java.io.File;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-import java.util.ArrayList;
-
 import org.apache.log4j.H2OPropertyConfigurator;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.PropertyConfigurator;
 import water.H2O;
 import water.persist.PersistManager;
 
+import java.io.File;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.util.ArrayList;
+
 /** Log for H2O. 
  *
  *  OOME: when the VM is low on memory, OutOfMemoryError can be thrown in the
@@ -302,6 +302,9 @@ private static void setLog4jProperties(String logDir, java.util.Properties p) th
     p.setProperty("log4j.logger.org.reflections.Reflections",   "ERROR");
     p.setProperty("log4j.logger.com.brsanthu.googleanalytics",  "ERROR");
 
+    // Turn down the logging for external libraries that Orc parser depends on
+    p.setProperty("log4j.logger.org.apache.hadoop.util.NativeCodeLoader", "ERROR");
+
     // See the following document for information about the pattern layout.
     // http://logging.apache.org/log4j/1.2/apidocs/org/apache/log4j/PatternLayout.html
     //
diff --git a/h2o-core/src/test/java/water/TestUtil.java b/h2o-core/src/test/java/water/TestUtil.java
index 692974c0d17d..05f69aeaa1d2 100644
--- a/h2o-core/src/test/java/water/TestUtil.java
+++ b/h2o-core/src/test/java/water/TestUtil.java
@@ -9,6 +9,7 @@
 import org.junit.runners.model.Statement;
 import water.fvec.*;
 import water.parser.BufferedString;
+import water.parser.DefaultParserProviders;
 import water.parser.ParseDataset;
 import water.parser.ParseSetup;
 import water.util.Log;
@@ -215,6 +216,7 @@ protected static Frame parse_test_file( Key outputKey, String fname) {
     NFSFileVec nfs = NFSFileVec.make(f);
     return ParseDataset.parse(outputKey, nfs._key);
   }
+
   protected Frame parse_test_file( Key outputKey, String fname , boolean guessSetup) {
     File f = find_test_file(fname);
     assert f != null && f.exists():" file not found: " + fname;
@@ -222,6 +224,38 @@ protected Frame parse_test_file( Key outputKey, String fname , boolean guessSetu
     return ParseDataset.parse(outputKey, new Key[]{nfs._key}, true, ParseSetup.guessSetup(new Key[]{nfs._key},false,1));
   }
 
+  protected Frame parse_test_file( String fname, String na_string, int check_header, byte[] column_types ) {
+    File f = find_test_file_static(fname);
+    assert f != null && f.exists():" file not found: " + fname;
+    NFSFileVec nfs = NFSFileVec.make(f);
+
+    Key[] res = {nfs._key};
+
+    // create new parseSetup in order to store our na_string
+    ParseSetup p = ParseSetup.guessSetup(res, new ParseSetup(DefaultParserProviders.GUESS_INFO,(byte) ',',true,
+            check_header,0,null,null,null,null,null));
+
+    // add the na_strings into p.
+    if (na_string != null) {
+      int column_number = p.getColumnTypes().length;
+      int na_length = na_string.length() - 1;
+
+      String[][] na_strings = new String[column_number][na_length + 1];
+
+      for (int index = 0; index < column_number; index++) {
+        na_strings[index][na_length] = na_string;
+      }
+
+      p.setNAStrings(na_strings);
+    }
+
+    if (column_types != null)
+      p.setColumnTypes(column_types);
+
+    return ParseDataset.parse(Key.make(), res, true, p);
+
+  }
+
   /** Find & parse a folder of CSV files.  NPE if file not found.
    *  @param fname Test filename
    *  @return      Frame or NPE */
@@ -239,6 +273,53 @@ protected Frame parse_test_folder( String fname ) {
     return ParseDataset.parse(Key.make(), res);
   }
 
+
+  /**
+   * Parse a folder with csv files when a single na_string is specified.
+   *
+   * @param fname
+   * @param na_string
+   * @return
+   */
+  protected Frame parse_test_folder( String fname, String na_string, int check_header, byte[] column_types ) {
+    File folder = find_test_file(fname);
+    assert folder.isDirectory();
+    File[] files = folder.listFiles();
+    Arrays.sort(files);
+    ArrayList<Key> keys = new ArrayList<>();
+    for( File f : files )
+      if( f.isFile() )
+        keys.add(NFSFileVec.make(f)._key);
+
+    Key[] res = new Key[keys.size()];
+    keys.toArray(res);  // generated the necessary key here
+
+    // create new parseSetup in order to store our na_string
+    ParseSetup p = ParseSetup.guessSetup(res, new ParseSetup(DefaultParserProviders.GUESS_INFO,(byte) ',',true,
+            check_header,0,null,null,null,null,null));
+
+    // add the na_strings into p.
+    if (na_string != null) {
+      int column_number = p.getColumnTypes().length;
+      int na_length = na_string.length() - 1;
+
+      String[][] na_strings = new String[column_number][na_length + 1];
+
+      for (int index = 0; index < column_number; index++) {
+        na_strings[index][na_length] = na_string;
+      }
+
+      p.setNAStrings(na_strings);
+    }
+
+    if (column_types != null)
+      p.setColumnTypes(column_types);
+
+    return ParseDataset.parse(Key.make(), res, true, p);
+
+  }
+
+
   /** A Numeric Vec from an array of ints
    *  @param rows Data
    *  @return The Vec  */
diff --git a/h2o-core/src/test/java/water/parser/ParserTest2.java b/h2o-core/src/test/java/water/parser/ParserTest2.java
index 397faa59b2e9..b841ce9921c0 100644
--- a/h2o-core/src/test/java/water/parser/ParserTest2.java
+++ b/h2o-core/src/test/java/water/parser/ParserTest2.java
@@ -1,14 +1,18 @@
 package water.parser;
 
-import org.junit.*;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.Test;
+import water.Key;
+import water.TestUtil;
+import water.fvec.Frame;
+import water.fvec.Vec;
+import water.util.PrettyPrint;
 
 import java.util.Random;
 import java.util.UUID;
 
-import water.*;
-import water.fvec.*;
-import water.util.PrettyPrint;
-
 import static water.parser.DefaultParserProviders.CSV_INFO;
 
 public class ParserTest2 extends TestUtil {
@@ -76,7 +80,7 @@ private static void testParsed(Frame fr, String[][] expected) {
                                               ar("'Tomas''s","test2'","test2",null),
                                               ar("last","'line''s","trailing","piece'") };
     Key k = ParserTest.makeByteVec(data);
-    ParseSetup gSetupF = ParseSetup.guessSetup(data[0].getBytes(), CSV_INFO, (byte)',', 4, false/*single quote*/, ParseSetup.NO_HEADER, null, null, null, null);
+    ParseSetup gSetupF = ParseSetup.guessSetup(null, data[0].getBytes(), CSV_INFO, (byte)',', 4, false/*single quote*/, ParseSetup.NO_HEADER, null, null, null, null);
     gSetupF._column_types = ParseSetup.strToColumnTypes(new String[]{"Enum", "Enum", "Enum", "Enum"});
     Frame frF = ParseDataset.parse(Key.make(), new Key[]{k}, false, gSetupF);
     testParsed(frF,expectFalse);
@@ -84,7 +88,7 @@ private static void testParsed(Frame fr, String[][] expected) {
     String[][] expectTrue = new String[][] { ar("Tomass,test,first,line", null),
                                              ar("Tomas''stest2","test2"),
                                              ar("last", "lines trailing piece") };
-    ParseSetup gSetupT = ParseSetup.guessSetup(data[0].getBytes(), CSV_INFO, (byte)',', 2, true/*single quote*/, ParseSetup.NO_HEADER, null, null, null, null);
+    ParseSetup gSetupT = ParseSetup.guessSetup(null, data[0].getBytes(), CSV_INFO, (byte)',', 2, true/*single quote*/, ParseSetup.NO_HEADER, null, null, null, null);
     gSetupT._column_types = ParseSetup.strToColumnTypes(new String[]{"Enum", "Enum", "Enum", "Enum"});
     Frame frT = ParseDataset.parse(Key.make(), new Key[]{k}, true, gSetupT);
     //testParsed(frT,expectTrue);  // not currently passing
diff --git a/h2o-core/src/test/java/water/rapids/GroupingBench.java b/h2o-core/src/test/java/water/rapids/GroupingBench.java
index 09ca2dd8923a..f126ca156fba 100644
--- a/h2o-core/src/test/java/water/rapids/GroupingBench.java
+++ b/h2o-core/src/test/java/water/rapids/GroupingBench.java
@@ -254,7 +254,7 @@ public class GroupingBench extends TestUtil {
 
   @Test public void runBench2() {
     Frame f1=null, f2=null, fx=null;
-    try { 
+    try {
       // build a hi count cardinality frame
       final long card = (long)1e4;
       f1 = buildFrame(card,-1);
diff --git a/h2o-hadoop/assemblyjar.gradle b/h2o-hadoop/assemblyjar.gradle
index bfb7058655b6..4b90f0ba5f61 100644
--- a/h2o-hadoop/assemblyjar.gradle
+++ b/h2o-hadoop/assemblyjar.gradle
@@ -25,6 +25,17 @@ dependencies {
   if (project.hasProperty("maprExtraDependency")) {
     compile(project.property("maprExtraDependency"))
   }
+  if (orcSupported) {
+    compile(project(":h2o-orc-parser")) {
+      // We do not get any dependencies but directly rely on provided environment
+      transitive = false
+    }
+
+    // Here we depends on hive-exec, but it is Hadoop version specific
+    compile("org.apache.hive:hive-exec:$orcHiveExecVersion") {
+      transitive = false
+    }
+  }
 }
 
 
@@ -46,6 +57,7 @@ shadowJar {
   manifest {
     attributes 'Main-Class': 'water.hadoop.h2odriver'
   }
+  zip64 true
 }
 
 artifacts {
diff --git a/h2o-hadoop/h2o-cdh5.2-assembly/build.gradle b/h2o-hadoop/h2o-cdh5.2-assembly/build.gradle
index f423ca0be309..1525220449ed 100644
--- a/h2o-hadoop/h2o-cdh5.2-assembly/build.gradle
+++ b/h2o-hadoop/h2o-cdh5.2-assembly/build.gradle
@@ -1,6 +1,8 @@
 ext {
   hadoopVersion = 'cdh5.2'
   hadoopMavenArtifactVersion = '2.5.0-cdh5.2.0'
+  orcSupported = true
+  orcHiveExecVersion = '0.13.1-cdh5.2.0'
 }
 
 apply from: '../assemblyjar.gradle'
diff --git a/h2o-hadoop/h2o-cdh5.2/build.gradle b/h2o-hadoop/h2o-cdh5.2/build.gradle
index f11842011daa..7c7e5a497b8d 100644
--- a/h2o-hadoop/h2o-cdh5.2/build.gradle
+++ b/h2o-hadoop/h2o-cdh5.2/build.gradle
@@ -1,6 +1,7 @@
 ext {
   hadoopVersion = 'cdh5.2'
   hadoopMavenArtifactVersion = '2.5.0-cdh5.2.0'
+  orcSupported = false
 }
 
 apply from: '../driverjar.gradle'
diff --git a/h2o-hadoop/h2o-cdh5.3-assembly/build.gradle b/h2o-hadoop/h2o-cdh5.3-assembly/build.gradle
index 2cb863125a04..d257a648aa77 100644
--- a/h2o-hadoop/h2o-cdh5.3-assembly/build.gradle
+++ b/h2o-hadoop/h2o-cdh5.3-assembly/build.gradle
@@ -1,6 +1,8 @@
 ext {
   hadoopVersion = 'cdh5.3'
   hadoopMavenArtifactVersion = '2.5.0-cdh5.3.0'
+  orcSupported = true
+  orcHiveExecVersion = '0.13.1-cdh5.3.0'
 }
 
 apply from: '../assemblyjar.gradle'
diff --git a/h2o-hadoop/h2o-cdh5.4.2-assembly/build.gradle b/h2o-hadoop/h2o-cdh5.4.2-assembly/build.gradle
index 2cb863125a04..56e97f6cbcec 100644
--- a/h2o-hadoop/h2o-cdh5.4.2-assembly/build.gradle
+++ b/h2o-hadoop/h2o-cdh5.4.2-assembly/build.gradle
@@ -1,6 +1,8 @@
 ext {
-  hadoopVersion = 'cdh5.3'
-  hadoopMavenArtifactVersion = '2.5.0-cdh5.3.0'
+  hadoopVersion = 'cdh5.4.2'
+  hadoopMavenArtifactVersion = '2.5.0-cdh5.4.2'
+  orcSupported = true
+  orcHiveExecVersion = "1.1.0-$hadoopVersion"
 }
 
 apply from: '../assemblyjar.gradle'
diff --git a/h2o-hadoop/h2o-cdh5.5.3-assembly/build.gradle b/h2o-hadoop/h2o-cdh5.5.3-assembly/build.gradle
index 175f6d6201ce..69c380972960 100644
--- a/h2o-hadoop/h2o-cdh5.5.3-assembly/build.gradle
+++ b/h2o-hadoop/h2o-cdh5.5.3-assembly/build.gradle
@@ -1,6 +1,8 @@
 ext {
   hadoopVersion = 'cdh5.5.3'
-  hadoopMavenArtifactVersion = '2.6.0-cdh5.5.2'
+  hadoopMavenArtifactVersion = '2.6.0-cdh5.5.4'
+  orcSupported = true
+  orcHiveExecVersion = "1.1.0-cdh5.5.4"
 }
 
 apply from: '../assemblyjar.gradle'
diff --git a/h2o-hadoop/h2o-cdh5.6.0-assembly/build.gradle b/h2o-hadoop/h2o-cdh5.6.0-assembly/build.gradle
index 9ab45de6b73e..6ae418505119 100644
--- a/h2o-hadoop/h2o-cdh5.6.0-assembly/build.gradle
+++ b/h2o-hadoop/h2o-cdh5.6.0-assembly/build.gradle
@@ -1,6 +1,8 @@
 ext {
   hadoopVersion = 'cdh5.6.0'
   hadoopMavenArtifactVersion = '2.6.0-cdh5.6.0'
+  orcSupported = true
+  orcHiveExecVersion = "1.1.0-$hadoopVersion"
 }
 
 apply from: '../assemblyjar.gradle'
diff --git a/h2o-hadoop/h2o-cdh5.7.0-assembly/build.gradle b/h2o-hadoop/h2o-cdh5.7.0-assembly/build.gradle
index e415be8d18f9..e10aca76f2f8 100644
--- a/h2o-hadoop/h2o-cdh5.7.0-assembly/build.gradle
+++ b/h2o-hadoop/h2o-cdh5.7.0-assembly/build.gradle
@@ -1,6 +1,8 @@
 ext {
   hadoopVersion = 'cdh5.7.0'
   hadoopMavenArtifactVersion = '2.6.0-cdh5.7.0'
+  orcSupported = true
+  orcHiveExecVersion = "1.1.0-$hadoopVersion"
 }
 
 apply from: '../assemblyjar.gradle'
diff --git a/h2o-hadoop/h2o-hdp2.1-assembly/build.gradle b/h2o-hadoop/h2o-hdp2.1-assembly/build.gradle
index db145e059a2f..a50f2cc92f88 100644
--- a/h2o-hadoop/h2o-hdp2.1-assembly/build.gradle
+++ b/h2o-hadoop/h2o-hdp2.1-assembly/build.gradle
@@ -1,6 +1,9 @@
 ext {
   hadoopVersion = 'hdp2.1'
   hadoopMavenArtifactVersion = '2.4.0.2.1.1.0-385'
+  orcSupported = true
+  orcHiveExecVersion = "0.13.0"
+
 }
 
 apply from: '../assemblyjar.gradle'
diff --git a/h2o-hadoop/h2o-hdp2.2-assembly/build.gradle b/h2o-hadoop/h2o-hdp2.2-assembly/build.gradle
index 768a0d4890e4..9a35a615a70d 100644
--- a/h2o-hadoop/h2o-hdp2.2-assembly/build.gradle
+++ b/h2o-hadoop/h2o-hdp2.2-assembly/build.gradle
@@ -1,6 +1,8 @@
 ext {
   hadoopVersion = 'hdp2.2'
   hadoopMavenArtifactVersion = '2.6.0.2.2.0.0-2041'
+  orcSupported = true
+  orcHiveExecVersion = "0.14.0"
 }
 
 apply from: '../assemblyjar.gradle'
diff --git a/h2o-hadoop/h2o-hdp2.3-assembly/build.gradle b/h2o-hadoop/h2o-hdp2.3-assembly/build.gradle
index ee950735a84c..4745a633a6e1 100644
--- a/h2o-hadoop/h2o-hdp2.3-assembly/build.gradle
+++ b/h2o-hadoop/h2o-hdp2.3-assembly/build.gradle
@@ -1,6 +1,9 @@
 ext {
   hadoopVersion = 'hdp2.3'
   hadoopMavenArtifactVersion = '2.7.1.2.3.2.0-2950'
+  orcSupported = true
+  orcHiveExecVersion = "1.2.1"
+
 }
 
 apply from: '../assemblyjar.gradle'
diff --git a/h2o-hadoop/h2o-hdp2.4-assembly/build.gradle b/h2o-hadoop/h2o-hdp2.4-assembly/build.gradle
index b6980e300c4f..df351650f75a 100644
--- a/h2o-hadoop/h2o-hdp2.4-assembly/build.gradle
+++ b/h2o-hadoop/h2o-hdp2.4-assembly/build.gradle
@@ -1,6 +1,8 @@
 ext {
   hadoopVersion = 'hdp2.4'
   hadoopMavenArtifactVersion = '2.7.1.2.4.0.0-169'
+  orcSupported = true
+  orcHiveExecVersion = "1.2.1"
 }
 
 apply from: '../assemblyjar.gradle'
diff --git a/h2o-hadoop/h2o-mapr3.1.1-assembly/build.gradle b/h2o-hadoop/h2o-mapr3.1.1-assembly/build.gradle
index b6c992030ce7..6bebf304757f 100644
--- a/h2o-hadoop/h2o-mapr3.1.1-assembly/build.gradle
+++ b/h2o-hadoop/h2o-mapr3.1.1-assembly/build.gradle
@@ -3,6 +3,7 @@ ext {
   hadoopVersion = 'mapr3.1.1'
   hadoopMavenArtifactVersion = '1.0.3-mapr-3.1.1'
   maprExtraDependency = 'org.json:org.json:chargebee-1.0'
+  orcSupported = false
 }
 
 apply from: '../assemblyjar.gradle'
diff --git a/h2o-hadoop/h2o-mapr4.0.1-assembly/build.gradle b/h2o-hadoop/h2o-mapr4.0.1-assembly/build.gradle
index 932ab3273d84..312e67cf6301 100644
--- a/h2o-hadoop/h2o-mapr4.0.1-assembly/build.gradle
+++ b/h2o-hadoop/h2o-mapr4.0.1-assembly/build.gradle
@@ -2,6 +2,7 @@ ext {
   hadoopVersion = 'mapr4.0.1'
   hadoopMavenArtifactVersion = '2.4.1-mapr-1408'
   maprExtraDependency = 'org.json:org.json:chargebee-1.0'
+  orcSupported = false
 }
 
 apply from: '../assemblyjar.gradle'
diff --git a/h2o-hadoop/h2o-mapr5.0-assembly/build.gradle b/h2o-hadoop/h2o-mapr5.0-assembly/build.gradle
index a64577c5d49d..2400cf85eecc 100644
--- a/h2o-hadoop/h2o-mapr5.0-assembly/build.gradle
+++ b/h2o-hadoop/h2o-mapr5.0-assembly/build.gradle
@@ -2,6 +2,7 @@ ext {
   hadoopVersion = 'mapr5.0'
   hadoopMavenArtifactVersion = '2.7.0-mapr-1506'
   maprExtraDependency = 'org.json:org.json:chargebee-1.0'
+  orcSupported = false
 }
 
 apply from: '../assemblyjar.gradle'
diff --git a/h2o-hadoop/h2o-mapr5.1-assembly/build.gradle b/h2o-hadoop/h2o-mapr5.1-assembly/build.gradle
index f1be24728c07..2229c9d0c788 100644
--- a/h2o-hadoop/h2o-mapr5.1-assembly/build.gradle
+++ b/h2o-hadoop/h2o-mapr5.1-assembly/build.gradle
@@ -2,6 +2,7 @@ ext {
   hadoopVersion = 'mapr5.1'
   hadoopMavenArtifactVersion = '2.7.0-mapr-1506'
   maprExtraDependency = 'org.json:org.json:chargebee-1.0'
+  orcSupported = false
 }
 
 apply from: '../assemblyjar.gradle'
diff --git a/h2o-parsers/h2o-avro-parser/src/main/java/water/parser/avro/AvroParserProvider.java b/h2o-parsers/h2o-avro-parser/src/main/java/water/parser/avro/AvroParserProvider.java
index 4966d81db5d2..2cb1edc95a69 100644
--- a/h2o-parsers/h2o-avro-parser/src/main/java/water/parser/avro/AvroParserProvider.java
+++ b/h2o-parsers/h2o-avro-parser/src/main/java/water/parser/avro/AvroParserProvider.java
@@ -16,7 +16,7 @@
 /**
  * Avro parser provider.
  */
-public class AvroParserProvider implements ParserProvider {
+public class AvroParserProvider extends ParserProvider {
 
   /* Setup for this parser */
   static ParserInfo AVRO_INFO = new ParserInfo("AVRO", DefaultParserProviders.MAX_CORE_PRIO + 10, true, true);
@@ -32,7 +32,7 @@ public Parser createParser(ParseSetup setup, Key<Job> jobKey) {
   }
 
   @Override
-  public ParseSetup guessSetup(byte[] bits, byte sep, int ncols, boolean singleQuotes,
+  public ParseSetup guessSetup(ByteVec bv, byte[] bits, byte sep, int ncols, boolean singleQuotes,
                                int checkHeader, String[] columnNames, byte[] columnTypes,
                                String[][] domains, String[][] naStrings) {
     return AvroParser.guessSetup(bits);
diff --git a/h2o-parsers/h2o-orc-parser/build.gradle b/h2o-parsers/h2o-orc-parser/build.gradle
new file mode 100644
index 000000000000..550186147979
--- /dev/null
+++ b/h2o-parsers/h2o-orc-parser/build.gradle
@@ -0,0 +1,38 @@
+//
+// H2O Orc Parser
+//
+description = "H2O Orc Parser"
+
+dependencies {
+  compile project(":h2o-core")
+  // Only PersistHDFS API
+  compile(project(":h2o-persist-hdfs")) {
+    transitive = false
+  }
+
+  // Note: What is connection between hive-exec version and hadoop-version and orc version?
+  // Note: In this case we are using hive version which is compatible with $orcDefaultHadoopClientVersion
+  // Note: for newest version it should be replaces by hive-orc
+  compile("org.apache.hive:hive-exec:$orcDefaultHiveExecVersion") {
+    transitive = false
+  }
+  // For compilation we need common
+  compile("org.apache.hadoop:hadoop-common:$orcDefaultHadoopClientVersion") {
+    transitive = false
+  }
+
+  testCompile "junit:junit:${junitVersion}"
+  testCompile project(path: ":h2o-core", configuration: "testArchives")
+  // We need correct version of MapRe Hadoop to run JUnits
+  testCompile("org.apache.hadoop:hadoop-client:$orcDefaultHadoopClientVersion")
+}
+
+apply from: "${rootDir}/gradle/dataCheck.gradle"
+
+test {
+  dependsOn ":h2o-core:testJar"
+  dependsOn smalldataCheck, cpLibs, jar, testJar, testMultiNode
+
+  // Defeat task 'test' by running no tests.
+  exclude '**'
+}
diff --git a/h2o-parsers/h2o-orc-parser/src/main/java/water/parser/orc/OrcParser.java b/h2o-parsers/h2o-orc-parser/src/main/java/water/parser/orc/OrcParser.java
new file mode 100644
index 000000000000..6a579591cff6
--- /dev/null
+++ b/h2o-parsers/h2o-orc-parser/src/main/java/water/parser/orc/OrcParser.java
@@ -0,0 +1,614 @@
+package water.parser.orc;
+
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.*;
+import org.apache.hadoop.hive.ql.io.orc.Reader;
+import org.apache.hadoop.hive.ql.io.orc.RecordReader;
+import org.apache.hadoop.hive.ql.io.orc.StripeInformation;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.joda.time.DateTime;
+import org.joda.time.MutableDateTime;
+import water.H2O;
+import water.Job;
+import water.Key;
+import water.fvec.Vec;
+import water.parser.*;
+import water.util.ArrayUtils;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+
+import static water.parser.orc.OrcUtil.isSupportedSchema;
+import static water.parser.orc.OrcUtil.schemaToColumnType;
+
+// Orc support
+
+/**
+ * ORC parser for H2O distributed parsing subsystem.
+ *
+ * Basically, here is the plan:
+ * To parse an Orc file, we need to do the following in order to get the following useful
+ * information:
+ * 1. Get a Reader rdr.
+ * 2. From the reader rdr, we can get the following pieces of information:
+ *  a. number of columns, column types and column names.  We only support parsing of primitive types;
+ *  b. Lists of StripeInformation that describes how many stripes of data that we will need to read;
+ *  c. For each stripe, get information like rows per stripe, data size in bytes
+ * 3.  The plan is to read the file in parallel in whole numbers of stripes.
+ * 4.  Inside each stripe, we will read data out in batches of VectorizedRowBatch (1024 rows or less).
+ *
+ */
+public class OrcParser extends Parser {
+
+  /** Orc Info */
+  private final Reader orcFileReader; // can generate all the other fields from this reader
+  public static final int DAY_TO_MS = 24*3600*1000;
+  public static final int ADD_OFFSET = 8*3600*1000;
+  public static final int HOUR_OFFSET = 3600000;  // in ms to offset for leap seconds, years
+  private MutableDateTime epoch = new MutableDateTime();  // used to help us out the leap seconds, years
+  private ArrayList<String> storeWarnings = new ArrayList<String>();  // store a list of warnings
+
+
+  OrcParser(ParseSetup setup, Key<Job> jobKey) {
+    super(setup, jobKey);
+
+    epoch.setDate(0);   // used to figure out leap seconds, years
+
+    this.orcFileReader = ((OrcParser.OrcParseSetup) setup).orcFileReader;
+  }
+
+  private transient int _cidx;
+
+  private transient HashMap<Integer,HashMap<Number,byte[]>> _toStringMaps = new HashMap<>();
+  /**
+   * This method calculates the number of stripes that will be read for each chunk.  Since
+   * only single threading is supported in reading each stripe, we will never split one stripe
+   * over different chunks.
+   *
+   * @param chunkId: chunk index, calculated as file size/chunk size.  The file size is calculated
+   *            with data plus overhead in terms of headers and other info, number of chunks
+   *            calculated will be higher than the actual chunks needed.  If the chunk number
+   *            is too high, the method will return without writing to
+   *            dout.
+   * @param din: ParseReader, not used for parsing orc files
+   * @param dout: ParseWriter, used to add data to H2O frame.
+   * @return: Parsewriter dout.
+   */
+  @Override
+  protected final ParseWriter parseChunk(int chunkId, ParseReader din, ParseWriter dout) {
+    _cidx = chunkId;
+    // only do something if within file size and the orc file is not empty
+    List<StripeInformation> stripesInfo = ((OrcParseSetup) this._setup).getStripes();
+    if(stripesInfo.size() == 0) {
+      dout.addError(new ParseWriter.ParseErr("Orc Parser: Empty file.", chunkId, 0L, -2L));
+      return dout; // empty file
+    }
+    OrcParseSetup setup = (OrcParseSetup) this._setup;
+    StripeInformation thisStripe = stripesInfo.get(chunkId);  // get one stripe
+    // write one stripe of data to H2O frame
+    String [] orcTypes = setup.getColumnTypesString();
+    boolean[] toInclude = setup.getToInclude();
+    try {
+      RecordReader perStripe = orcFileReader.rows(thisStripe.getOffset(), thisStripe.getDataLength(),
+          setup.getToInclude(), null, setup.getColumnNames());
+      VectorizedRowBatch batch = null;
+      long rows = 0;
+      long rowCount = thisStripe.getNumberOfRows();
+      while (rows != rowCount) {
+        batch = perStripe.nextBatch(batch);  // read orc file stripes in vectorizedRowBatch
+        long currentBatchRow = batch.count();
+        int nrows = (int)currentBatchRow;
+        if(currentBatchRow != nrows)
+          throw new IllegalArgumentException("got batch with too many records, does not fit in int");
+        ColumnVector[] dataVectors = batch.cols;
+        int colIndex = 0;
+        for (int col = 0; col < batch.numCols; ++col) {  // read one column at a time;
+          if (toInclude[col + 1]) { // only write a column if we actually want it
+            write1column(dataVectors[col], orcTypes[colIndex], colIndex, nrows, dout);
+            colIndex++;
+          }
+        }
+        rows  += currentBatchRow;    // record number of rows of data actually read
+      }
+      perStripe.close();
+    } catch(IOException ioe) {
+      throw new RuntimeException(ioe);
+    }
+    return dout;
+  }
+
+
+  /**
+   * This method writes one column of H2O data frame at a time.
+   *
+   * @param oneColumn
+   * @param columnType
+   * @param cIdx
+   * @param rowNumber
+   * @param dout
+   */
+  private void write1column(ColumnVector oneColumn, String columnType, int cIdx, int rowNumber,ParseWriter dout) {
+    if(oneColumn.isRepeating && !oneColumn.noNulls) { // ALL NAs
+      for(int i = 0; i < rowNumber; ++i)
+        dout.addInvalidCol(cIdx);
+    } else  switch (columnType.toLowerCase()) {
+      case "bigint":
+      case "boolean":
+      case "int":
+      case "smallint":
+      case "tinyint":
+        writeLongcolumn((LongColumnVector)oneColumn, cIdx, rowNumber, dout);
+        break;
+      case "float":
+      case "double":
+        writeDoublecolumn((DoubleColumnVector)oneColumn, cIdx, rowNumber, dout);
+        break;
+      case "numeric":
+      case "real":
+        if (oneColumn instanceof LongColumnVector)
+          writeLongcolumn((LongColumnVector)oneColumn, cIdx, rowNumber, dout);
+        else
+          writeDoublecolumn((DoubleColumnVector)oneColumn, cIdx, rowNumber, dout);
+        break;
+      case "string":
+      case "varchar":
+      case "char":
+//        case "binary":  //FIXME: only reading it as string right now.
+        writeStringcolumn((BytesColumnVector)oneColumn, cIdx, rowNumber, dout);
+        break;
+      case "date":
+      case "timestamp":
+        writeTimecolumn((LongColumnVector)oneColumn, columnType, cIdx, rowNumber, dout);
+        break;
+      case "decimal":
+        writeDecimalcolumn((DecimalColumnVector)oneColumn, cIdx, rowNumber, dout);
+        break;
+      default:
+        throw new IllegalArgumentException("Unsupported Orc schema type: " + columnType);
+    }
+  }
+
+  /**
+   * This method is written to take care of the leap seconds, leap year effects.  Our original
+   * plan of converting number of days from epoch does not quite work out right due to all these
+   * leap seconds, years accumulated over the century.  However, I do notice that when we are
+   * not correcting for the leap seconds/years, if we build a dateTime object, the hour does not
+   * work out to be 00.  Instead it is off.  In this case, we just calculate the offset and take
+   * if off our straight forward timestamp calculation.
+   *
+   * @param daysSinceEpoch: number of days since epoch (1970 1/1)
+   * @return long: correct timestamp corresponding to daysSinceEpoch
+   */
+  private long correctTimeStamp(long daysSinceEpoch) {
+    long timestamp = (daysSinceEpoch*DAY_TO_MS+ADD_OFFSET);
+    DateTime date = new DateTime(timestamp);
+    int hour = date.hourOfDay().get();
+    if (hour == 0)
+      return timestamp;
+    else
+      return (timestamp-hour*HOUR_OFFSET);
+  }
+
+  /**
+   * This method writes one column of H2O frame for column type timestamp.  This is just a long that
+   * records the number of seconds since Jan 1, 2015.
+   *
+   * @param col
+   * @param cIdx
+   * @param rowNumber
+   * @param dout
+   */
+  private void writeTimecolumn(LongColumnVector col, String columnType,int cIdx,
+                               int rowNumber, ParseWriter dout) {
+    boolean timestamp = columnType.equals("timestamp");
+    long [] oneColumn = col.vector;
+    if(col.isRepeating) {
+      long val = timestamp ? oneColumn[0] / 1000000 : correctTimeStamp(oneColumn[0]);
+      for (int rowIndex = 0; rowIndex < rowNumber; rowIndex++)
+        dout.addNumCol(cIdx, val, 0);
+    } else if(col.noNulls) {
+      for (int rowIndex = 0; rowIndex < rowNumber; rowIndex++)
+        dout.addNumCol(cIdx, timestamp ? oneColumn[rowIndex] / 1000000 : correctTimeStamp(oneColumn[rowIndex]), 0);
+    } else {
+      boolean[] isNull = col.isNull;
+      for (int rowIndex = 0; rowIndex < rowNumber; rowIndex++) {
+        if (isNull[rowIndex])
+          dout.addInvalidCol(cIdx);
+        else
+          dout.addNumCol(cIdx, timestamp ? oneColumn[rowIndex] / 1000000 : correctTimeStamp(oneColumn[rowIndex]), 0);
+      }
+    }
+  }
+
+  /**
+   * This method writes a column to H2O frame for column type Decimal.  It is just written as some
+   * integer without using the scale field.  Need to make sure this is what the customer wants.
+   *
+   * @param col
+   * @param cIdx
+   * @param rowNumber
+   * @param dout
+   */
+  private void writeDecimalcolumn(DecimalColumnVector col, int cIdx,
+                                  int rowNumber, ParseWriter dout) {
+    HiveDecimalWritable[] oneColumn = col.vector;
+    if(col.isRepeating) {
+      HiveDecimal hd = oneColumn[0].getHiveDecimal();
+      for (int rowIndex = 0; rowIndex < rowNumber; rowIndex++)
+        dout.addNumCol(cIdx, hd.unscaledValue().longValue(),-hd.scale());
+    } else  if(col.noNulls) {
+      for (int rowIndex = 0; rowIndex < rowNumber; rowIndex++) {
+        HiveDecimal hd = oneColumn[rowIndex].getHiveDecimal();
+        dout.addNumCol(cIdx, hd.unscaledValue().longValue(),-hd.scale());
+      }
+    } else {
+      boolean [] isNull = col.isNull;
+      for (int rowIndex = 0; rowIndex < rowNumber; rowIndex++) {
+        if (isNull[rowIndex])
+          dout.addInvalidCol(cIdx);
+        else {
+          HiveDecimal hd = oneColumn[rowIndex].getHiveDecimal();
+          dout.addNumCol(cIdx, hd.unscaledValue().longValue(), -hd.scale());
+        }
+      }
+    }
+  }
+
+  /**
+   * This method writes a column of H2O frame for Orc File column types of string, varchar, char and
+   * binary at some point.
+   *
+   * @param col
+   * @param cIdx
+   * @param rowNumber
+   * @param dout
+   */
+  private void writeStringcolumn(BytesColumnVector col, int cIdx, int rowNumber, ParseWriter dout) {
+    BufferedString bs = new BufferedString();
+    if(col.isRepeating) {
+      dout.addStrCol(cIdx, bs.set(col.vector[0], col.start[0], col.length[0]));
+      for (int rowIndex = 1; rowIndex < rowNumber; ++rowIndex)
+        dout.addStrCol(cIdx, bs);
+    } else if(col.noNulls){
+      for (int rowIndex = 0; rowIndex < rowNumber; rowIndex++)
+        dout.addStrCol(cIdx, bs.set(col.vector[rowIndex], col.start[rowIndex], col.length[rowIndex]));
+    } else {
+      boolean [] isNull = col.isNull;
+      for (int rowIndex = 0; rowIndex < rowNumber; rowIndex++) {
+        if (isNull[rowIndex])
+          dout.addInvalidCol(cIdx);
+        else
+          dout.addStrCol(cIdx, bs.set(col.vector[rowIndex], col.start[rowIndex], col.length[rowIndex]));
+      }
+    }
+  }
+
+
+  /**
+   * This method writes a column of H2O frame for Orc File column type of float or double.
+   *
+   * @param vec
+   * @param colId
+   * @param rowNumber
+   * @param dout
+   */
+  private void writeDoublecolumn(DoubleColumnVector vec, int colId, int rowNumber, ParseWriter dout) {
+    double[] oneColumn = vec.vector;
+    byte t = _setup.getColumnTypes()[colId];
+    switch(t) {
+      case Vec.T_CAT:
+        if(_toStringMaps.get(colId) == null)
+          _toStringMaps.put(colId,new HashMap<Number, byte[]>());
+        HashMap<Number,byte[]> map = _toStringMaps.get(colId);
+        BufferedString bs = new BufferedString();
+        if(vec.isRepeating) {
+          bs.set(Double.toString(oneColumn[0]).getBytes());
+          for (int i = 0; i < rowNumber; ++i)
+            dout.addStrCol(colId, bs);
+        } else  if (vec.noNulls) {
+          for (int i = 0; i < rowNumber; i++) {
+            double d = oneColumn[i];
+            if(map.get(d) == null) // TODO probably more effficient if moved to the data output
+              map.put(d, Double.toString(d).getBytes());
+            dout.addStrCol(colId, bs.set(map.get(d)));
+          }
+        } else {
+          for (int i = 0; i < rowNumber; i++) {
+            boolean [] isNull = vec.isNull;
+            if (isNull[i])
+              dout.addInvalidCol(colId);
+            else {
+              double d = oneColumn[i];
+              if(map.get(d) == null)
+                map.put(d,Double.toString(d).getBytes());
+              dout.addStrCol(colId, bs.set(map.get(d)));
+            }
+          }
+        }
+        break;
+      default:
+        if(vec.isRepeating) {
+          for (int i = 0; i < rowNumber; ++i)
+            dout.addNumCol(colId, oneColumn[0]);
+        } else  if (vec.noNulls) {
+          for (int rowIndex = 0; rowIndex < rowNumber; rowIndex++)
+            dout.addNumCol(colId, oneColumn[rowIndex]);
+        } else {
+          boolean [] isNull = vec.isNull;
+          for (int rowIndex = 0; rowIndex < rowNumber; rowIndex++) {
+            if (isNull[rowIndex]) dout.addInvalidCol(colId);
+            else dout.addNumCol(colId, oneColumn[rowIndex]);
+          }
+        }
+        break;
+    }
+  }
+
+  /**
+   * This method writes a column of H2O frame for Orc File column type of boolean, bigint, int, smallint,
+   * tinyint and date.
+   *
+   * @param vec
+   * @param colId
+   * @param rowNumber
+   * @param dout
+   */
+  private void writeLongcolumn(LongColumnVector vec, int colId, int rowNumber, ParseWriter dout) {
+    long[] oneColumn = vec.vector;
+    byte t = _setup.getColumnTypes()[colId];
+    switch(t) {
+      case Vec.T_CAT:
+        if(_toStringMaps.get(colId) == null)
+          _toStringMaps.put(colId,new HashMap<Number, byte[]>());
+        HashMap<Number,byte[]> map = _toStringMaps.get(colId);
+        BufferedString bs = new BufferedString();
+        if(vec.isRepeating) {
+          bs.set(Long.toString(oneColumn[0]).getBytes());
+          for (int i = 0; i < rowNumber; ++i)
+            dout.addStrCol(colId, bs);
+        } else  if (vec.noNulls) {
+          for (int i = 0; i < rowNumber; i++) {
+            long l = oneColumn[i];
+            if(map.get(l) == null)
+              map.put(l,Long.toString(l).getBytes());
+            dout.addStrCol(colId, bs.set(map.get(l)));
+          }
+        } else {
+          for (int i = 0; i < rowNumber; i++) {
+            boolean [] isNull = vec.isNull;
+            if (isNull[i])
+              dout.addInvalidCol(colId);
+            else {
+              long l = oneColumn[i];
+              if(map.get(l) == null)
+                map.put(l,Long.toString(l).getBytes());
+              dout.addStrCol(colId, bs.set(map.get(l)));
+            }
+          }
+        }
+        break;
+      default:
+        if(vec.isRepeating) {
+          for (int i = 0; i < rowNumber; ++i)
+            dout.addNumCol(colId, oneColumn[0], 0);
+        } else  if (vec.noNulls) {
+          for (int rowIndex = 0; rowIndex < rowNumber; rowIndex++) {
+            check_Min_Value(oneColumn[rowIndex], colId, rowNumber, dout);
+            dout.addNumCol(colId, oneColumn[rowIndex], 0);
+          }
+        } else {
+          for (int rowIndex = 0; rowIndex < rowNumber; rowIndex++) {
+            boolean [] isNull = vec.isNull;
+            if (isNull[rowIndex])
+              dout.addInvalidCol(colId);
+            else {
+              check_Min_Value(oneColumn[rowIndex], colId, rowNumber, dout);
+              dout.addNumCol(colId, oneColumn[rowIndex], 0);
+            }
+          }
+        }
+        break;
+    }
+  }
+
+  /**
+   * This method is written to check and make sure any value written to a column of type long
+   * is more than Long.MIN_VALUE.  If this is not true, a warning will be passed to the user.
+   *
+   * @param l
+   * @param cIdx
+   * @param rowNumber
+   * @param dout
+   */
+  private void check_Min_Value(long l, int cIdx, int rowNumber, ParseWriter dout) {
+    if (l <= Long.MIN_VALUE) {
+      String warning = "Orc Parser: Long.MIN_VALUE: " + l + " is found in column "+cIdx+" row "+rowNumber +
+          " of stripe "+_cidx +".  This value is used for sentinel and will not be parsed correctly.";
+      dout.addError(new ParseWriter.ParseErr(warning, _cidx, rowNumber, -2L));
+    }
+  }
+
+  public static class OrcParseSetup extends ParseSetup {
+    // expand to include Orc specific fields
+    transient Reader orcFileReader;
+    String[] columnTypesString;
+    boolean[] toInclude;
+    String[] allColumnNames;
+
+    public OrcParseSetup(int ncols,
+                         String[] columnNames,
+                         byte[] ctypes,
+                         String[][] domains,
+                         String[][] naStrings,
+                         String[][] data,
+                         Reader orcReader,
+                         String[] columntypes,
+                         boolean[] toInclude,
+                         String[] allColNames, ParseWriter.ParseErr[] errs) {
+      super(OrcParserProvider.ORC_INFO, (byte) '|', true, HAS_HEADER ,
+          ncols, columnNames, ctypes, domains, naStrings, data, errs);
+      this.orcFileReader = orcReader;
+      this.columnTypesString = columntypes;
+      this.toInclude = toInclude;
+      this.allColumnNames = allColNames;
+    }
+
+    @Override
+    protected boolean isCompatible(ParseSetup setupB) {
+      return super.isCompatible(setupB) && Arrays.equals(getColumnTypes(),setupB.getColumnTypes());
+    }
+
+    @Override
+    protected Parser parser(Key jobKey) {
+      return new OrcParser(this, jobKey);
+    }
+
+    public Reader getOrcFileReader() {
+      return this.orcFileReader;
+    }
+
+    public String[] getColumnTypesString() {
+      return this.columnTypesString;
+    }
+
+    public void setColumnTypeStrings(String[] columnTypeStrings) {
+      this.columnTypesString = columnTypeStrings;
+    }
+
+    public boolean[] getToInclude() { return this.toInclude; }
+    public String[] getAllColNames() { return this.allColumnNames; }
+    public void setAllColNames(String[] columnNames) {
+      this.allColumnNames = allColumnNames;
+    }
+
+    public void setOrcFileReader(Reader orcFileReader) {
+      this.orcFileReader = orcFileReader;
+      this.stripesInfo = orcFileReader.getStripes();
+    }
+    private transient List<StripeInformation> stripesInfo;
+    public List<StripeInformation> getStripes() {return stripesInfo;}
+  }
+
+  // types are flattened in pre-order tree walk, here we just count the number of fields for non-primitve types
+  // which are ignored for now
+  static private int countStructFields(ObjectInspector x, ArrayList<String> allColumnNames) {
+    int res = 1;
+    switch(x.getCategory()) {
+      case STRUCT:
+        StructObjectInspector structObjectInspector = (StructObjectInspector) x;
+        List<StructField> allColumns = (List<StructField>) structObjectInspector.getAllStructFieldRefs(); // column info
+        for (StructField oneField : allColumns) {
+          allColumnNames.add(oneField.getFieldName());
+          res += countStructFields(oneField.getFieldObjectInspector(),allColumnNames);
+        }
+        break;
+      case LIST:
+        ListObjectInspector listObjectInspector = (ListObjectInspector) x;
+        allColumnNames.add("list");
+        res += countStructFields(listObjectInspector.getListElementObjectInspector(),allColumnNames);
+        break;
+      case MAP:
+        MapObjectInspector mapObjectInspector = (MapObjectInspector) x;
+        allColumnNames.add("mapKey");
+        res += countStructFields(mapObjectInspector.getMapKeyObjectInspector(),allColumnNames);
+        allColumnNames.add("mapValue");
+        res += countStructFields(mapObjectInspector.getMapValueObjectInspector(),allColumnNames);
+        break;
+      case UNION:
+        UnionObjectInspector unionObjectInspector = (UnionObjectInspector)x;
+        allColumnNames.add("union");
+        for( ObjectInspector xx:unionObjectInspector.getObjectInspectors())
+          res += countStructFields(xx,allColumnNames);
+        break;
+      case PRIMITIVE:break;
+      default: throw H2O.unimpl();
+    }
+    return res;
+  }
+  /*
+   * This function will derive information like column names, types and number from
+   * the inspector.
+   */
+  static OrcParseSetup deriveParseSetup(Reader orcFileReader, StructObjectInspector insp) {
+    List<StructField> allColumns = (List<StructField>) insp.getAllStructFieldRefs();  // grab column info
+    List<StripeInformation> allStripes = orcFileReader.getStripes();  // grab stripe information
+    ArrayList<String> allColNames = new ArrayList<>();
+    boolean[] toInclude = new boolean[allColumns.size()+1];
+    int supportedFieldCnt = 0 ;
+    int colIdx = 0;
+    for (StructField oneField:allColumns) {
+      allColNames.add(oneField.getFieldName());
+      String columnType = oneField.getFieldObjectInspector().getTypeName();
+      if (columnType.toLowerCase().contains("decimal")) {
+        columnType = "decimal";
+      }
+      if (isSupportedSchema(columnType)) {
+        toInclude[colIdx+1] = true;
+        supportedFieldCnt++;
+      }
+      int cnt = countStructFields(oneField.getFieldObjectInspector(),allColNames);
+      if(cnt > 1)
+        toInclude = Arrays.copyOf(toInclude,toInclude.length + cnt-1);
+      colIdx+=cnt;
+    }
+    String [] allNames = allColNames.toArray(new String[allColNames.size()]);
+    String[] names = new String[supportedFieldCnt];
+
+    byte[] types = new byte[supportedFieldCnt];
+    String[][] domains = new String[supportedFieldCnt][];
+    String[] dataPreview = new String[supportedFieldCnt];
+    String[] dataTypes = new String[supportedFieldCnt];
+    ParseWriter.ParseErr[] errs = new ParseWriter.ParseErr[0];
+
+    // go through all column information
+    int columnIndex = 0;
+    for (StructField oneField : allColumns) {
+      String columnType = oneField.getFieldObjectInspector().getTypeName();
+      if (columnType.toLowerCase().contains("decimal"))
+        columnType = "decimal"; // get rid of strange attachment
+      if (isSupportedSchema(columnType)) {
+        names[columnIndex] = oneField.getFieldName();
+        types[columnIndex] = schemaToColumnType(columnType);
+        dataTypes[columnIndex] = columnType;
+        columnIndex++;
+      } else {
+        errs = ArrayUtils.append(errs, new ParseWriter.ParseErr("Orc Parser: Skipping field: "
+            + oneField.getFieldName() + " because of unsupported type: " + columnType, -1, -1L, -2L));
+      }
+    }
+
+    // get size of each stripe
+    long[] stripeSizes = new long[allStripes.size()];
+    long fileSize = 0L;
+    long maxStripeSize = 0L;
+
+    for (int index = 0; index < allStripes.size(); index++) {
+      long stripeSize = allStripes.get(index).getDataLength();
+
+      if (stripeSize > maxStripeSize)
+        maxStripeSize = stripeSize;
+
+      fileSize = fileSize + stripeSize;
+      stripeSizes[index] = fileSize;
+    }
+    OrcParseSetup ps = new OrcParseSetup(
+        supportedFieldCnt,
+        names,
+        types,
+        domains,
+        null,
+        new String[][] { dataPreview },
+        orcFileReader,
+        dataTypes,
+        toInclude,
+        allNames,
+        errs
+    );
+
+    return ps;
+  }
+}
\ No newline at end of file
diff --git a/h2o-parsers/h2o-orc-parser/src/main/java/water/parser/orc/OrcParserProvider.java b/h2o-parsers/h2o-orc-parser/src/main/java/water/parser/orc/OrcParserProvider.java
new file mode 100644
index 000000000000..6fcf921f72bf
--- /dev/null
+++ b/h2o-parsers/h2o-orc-parser/src/main/java/water/parser/orc/OrcParserProvider.java
@@ -0,0 +1,138 @@
+package water.parser.orc;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.orc.OrcFile;
+import org.apache.hadoop.hive.ql.io.orc.Reader;
+import org.apache.hadoop.hive.ql.io.orc.StripeInformation;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import water.DKV;
+import water.H2O;
+import water.Job;
+import water.Key;
+import water.fvec.*;
+import water.parser.*;
+import water.persist.PersistHdfs;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+
+import static water.fvec.FileVec.getPathForKey;
+
+
+/**
+ * Orc parser provider.
+ */
+public class OrcParserProvider extends ParserProvider {
+
+  /* Setup for this parser */
+  static ParserInfo ORC_INFO = new ParserInfo("ORC", DefaultParserProviders.MAX_CORE_PRIO + 20, true);
+
+  @Override
+  public ParserInfo info() {
+    return ORC_INFO;
+  }
+
+  @Override
+  public Parser createParser(ParseSetup setup, Key<Job> jobKey) {
+    return new OrcParser(setup, jobKey);
+  }
+
+  @Override
+  public ParseSetup guessSetup(ByteVec bv, byte [] bits, byte sep, int ncols, boolean singleQuotes,
+                               int checkHeader, String[] columnNames, byte[] columnTypes,
+                               String[][] domains, String[][] naStrings) {
+    if(bv instanceof FileVec)
+      return readSetup((FileVec)bv, columnNames, columnTypes);
+    throw new UnsupportedOperationException("ORC only works on Files");
+  }
+
+  /**
+   * Use only the first file to setup everything.
+   *
+   * @param inputs  input keys
+   * @param requiredSetup  user given parser setup
+   * @return
+   */
+  @Override
+  public ParseSetup createParserSetup(Key[] inputs, ParseSetup requiredSetup) {
+
+    FileVec f;
+    Object frameOrVec = DKV.getGet(inputs[0]);
+
+    if (frameOrVec instanceof water.fvec.Frame)
+      f = (FileVec) ((Frame) frameOrVec).vec(0);
+    else
+      f = (FileVec) frameOrVec;
+    return readSetup(f, requiredSetup.getColumnNames(), requiredSetup.getColumnTypes());
+  }
+
+  private Reader getReader(FileVec f) throws IOException {
+    String strPath = getPathForKey(f._key);
+    Path path = new Path(strPath);
+    if(f instanceof HDFSFileVec)
+      return OrcFile.createReader(PersistHdfs.getFS(strPath), path);
+    else
+      return OrcFile.createReader(path, OrcFile.readerOptions(new Configuration()));
+  }
+
+  /**
+   * This method will create the readers and others info needed to parse an orc file.
+   * In addition, it will not over-ride the columnNames, columnTypes that the user
+   * may want to force upon it.  However, we only allow users to set column types to
+   * enum at this point and ignore all the other requests.
+   *
+   * @param f
+   * @param columnNames
+   * @param columnTypes
+   * @return
+   */
+  public ParseSetup readSetup(FileVec f, String[] columnNames, byte[] columnTypes) {
+    try {
+      Reader orcFileReader = getReader(f);
+      StructObjectInspector insp = (StructObjectInspector) orcFileReader.getObjectInspector();
+      OrcParser.OrcParseSetup stp = OrcParser.deriveParseSetup(orcFileReader, insp);
+
+      // change back the columnNames and columnTypes if they are specified already
+      if (!(columnNames == null) && (stp.getAllColNames().length == columnNames.length)) { // copy column name
+        stp.setColumnNames(columnNames);
+        stp.setAllColNames(columnNames);
+      }
+
+      if (!(columnTypes == null) && (columnTypes.length == stp.getColumnTypes().length)) { // copy enum type only
+        byte[] old_columnTypes = stp.getColumnTypes();
+        String[] old_columnTypeNames = stp.getColumnTypesString();
+        for (int index = 0; index < columnTypes.length; index++) {
+          if (columnTypes[index] == Vec.T_CAT)  // only copy the enum types
+            old_columnTypes[index] = columnTypes[index];
+        }
+        stp.setColumnTypes(old_columnTypes);
+        stp.setColumnTypeStrings(old_columnTypeNames);
+      }
+
+      List<StripeInformation> stripesInfo = orcFileReader.getStripes();
+      if(stripesInfo.size() == 0) { // empty file
+        f.setChunkSize(stp._chunk_size = (int)f.length());
+        return stp;
+      }
+      f.setNChunks(stripesInfo.size());
+      stp._chunk_size = f._chunkSize;
+      assert f.nChunks() == stripesInfo.size(); // ORC parser needs one-to one mapping between chunk and strip (just ids, offsets do not matter)
+      return stp;
+    } catch(IOException ioe) {
+      throw new RuntimeException(ioe);
+    }
+  }
+
+  @Override
+  public ParseSetup setupLocal(Vec v, ParseSetup setup){
+    if(!(v instanceof FileVec)) throw H2O.unimpl("ORC only implemented for HDFS / NFS files");
+    try {
+      ((OrcParser.OrcParseSetup)setup).setOrcFileReader(getReader((FileVec)v));
+
+      return setup;
+
+    } catch (IOException e) {throw new RuntimeException(e);}
+  }
+}
diff --git a/h2o-parsers/h2o-orc-parser/src/main/java/water/parser/orc/OrcUtil.java b/h2o-parsers/h2o-orc-parser/src/main/java/water/parser/orc/OrcUtil.java
new file mode 100644
index 000000000000..4983eb775788
--- /dev/null
+++ b/h2o-parsers/h2o-orc-parser/src/main/java/water/parser/orc/OrcUtil.java
@@ -0,0 +1,72 @@
+package water.parser.orc;
+
+import water.fvec.Vec;
+
+/**
+ * Utilities to work with Orc schema.
+ */
+public final class OrcUtil {
+
+    /** Return true if the given schema can be transformed
+     * into h2o type.
+     *
+     * @param s  orc field name in string
+     * @return  true if the schema can be transformed into H2O type
+     */
+    public static boolean isSupportedSchema(String s) {
+
+        switch (s.toLowerCase()) {
+            case "boolean":
+            case "bigint":  // long
+//      case "binary":    // removed binary column type support for now
+            case "char":
+            case "date":
+            case "decimal":
+            case "double":
+            case "float":
+            case "int":
+            case "smallint":
+            case "string":
+            case "timestamp":
+            case "tinyint":
+            case "varchar":
+            case "enum":
+                return true;
+            default:
+                return false;
+        }
+    }
+
+    /**
+     * Transform Orc column types into H2O type.
+     *
+     * @param s  Orc data type
+     * @return  a byte representing H2O column type
+     * @throws IllegalArgumentException  if schema is not supported
+     */
+    public static byte schemaToColumnType(String s) {
+        switch (s.toLowerCase()) {
+            case "boolean":
+            case "smallint":
+            case "tinyint":
+            case "bigint":  // FIXME: make sure this is fixed by Tomas.
+            case "int":
+            case "float":
+            case "double":
+            case "decimal":
+                return Vec.T_NUM;
+            case "timestamp":
+            case "date":
+                return Vec.T_TIME;
+            case "enum":
+                return Vec.T_CAT;
+            case "string":
+            case "varchar":
+//      case "binary":  // Removed binary column type support for now
+            case "char":
+                return Vec.T_STR;
+            default:
+                throw new IllegalArgumentException("Unsupported Orc schema type: " + s);
+        }
+    }
+}
\ No newline at end of file
diff --git a/h2o-parsers/h2o-orc-parser/src/main/resources/META-INF/services/water.parser.ParserProvider b/h2o-parsers/h2o-orc-parser/src/main/resources/META-INF/services/water.parser.ParserProvider
new file mode 100644
index 000000000000..f48f8fd351a5
--- /dev/null
+++ b/h2o-parsers/h2o-orc-parser/src/main/resources/META-INF/services/water.parser.ParserProvider
@@ -0,0 +1 @@
+water.parser.orc.OrcParserProvider
diff --git a/h2o-parsers/h2o-orc-parser/src/test/java/water/parser/ParseTestMultiFileOrc.java b/h2o-parsers/h2o-orc-parser/src/test/java/water/parser/ParseTestMultiFileOrc.java
new file mode 100644
index 000000000000..22c89d841116
--- /dev/null
+++ b/h2o-parsers/h2o-orc-parser/src/test/java/water/parser/ParseTestMultiFileOrc.java
@@ -0,0 +1,55 @@
+package water.parser;
+
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+import water.TestUtil;
+import water.fvec.Frame;
+
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Test suite for orc parser.
+ *
+ * This test will attempt to perform multi-file parsing of a csv and orc file and compare
+ * the frame summary statistics to make sure they are equivalent.
+ *
+ * -- Requested by Tomas N.
+ *
+ */
+public class ParseTestMultiFileOrc extends TestUtil {
+
+    private double EPSILON = 1e-9;
+    private long ERRORMARGIN = 1000L;  // error margin when compare timestamp.
+    int totalFilesTested = 0;
+    int numberWrong = 0;
+
+    private String[] csvDirectories = {"bigdata/laptop/parser/orc/pubdev_3200/air05_csv",
+            "bigdata/laptop/parser/orc/milsongs_orc_csv", "smalldata/synthetic_perfect_separation"};
+    private String[] orcDirectories = {"bigdata/laptop/parser/orc/pubdev_3200/air05_orc",
+            "bigdata/laptop/parser/orc/milsongs_orc", "smalldata/parser/orc/synthetic_perfect_separation"};
+
+    @BeforeClass
+    static public void setup() { TestUtil.stall_till_cloudsize(5); }
+
+    @Test
+    public void testParseMultiFileOrcs() {
+
+        for (int f_index = 0; f_index < csvDirectories.length; f_index++) {
+            Frame csv_frame = parse_test_folder(csvDirectories[f_index], "\\N", 0, null);
+
+            byte[] types = csv_frame.types();
+
+            for (int index = 0; index < types.length; index++) {
+                if (types[index] == 0)
+                    types[index] = 4;
+            }
+
+            Frame orc_frame = parse_test_folder(orcDirectories[f_index], null, 0, types);
+            assertTrue(TestUtil.isIdenticalUpToRelTolerance(csv_frame, orc_frame, 1e-5));
+
+            csv_frame.delete();
+            orc_frame.delete();
+        }
+    }
+}
\ No newline at end of file
diff --git a/h2o-parsers/h2o-orc-parser/src/test/java/water/parser/ParseTestORCCSV.java b/h2o-parsers/h2o-orc-parser/src/test/java/water/parser/ParseTestORCCSV.java
new file mode 100644
index 000000000000..89c4f9e0a62d
--- /dev/null
+++ b/h2o-parsers/h2o-orc-parser/src/test/java/water/parser/ParseTestORCCSV.java
@@ -0,0 +1,78 @@
+package water.parser;
+
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+import water.TestUtil;
+import water.fvec.Frame;
+
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Test suite for orc parser.
+ *
+ * This test will attempt to parse a bunch of files (orc and csv).  We compare the frames of these files and make
+ * sure that they are equivalent.
+ *
+ * -- Requested by Tomas N.
+ *
+ */
+public class ParseTestORCCSV extends TestUtil {
+
+    private double EPSILON = 1e-9;
+    private long ERRORMARGIN = 1000L;  // error margin when compare timestamp.
+    int totalFilesTested = 0;
+    int numberWrong = 0;
+
+    private String[] csvFiles = {"smalldata/parser/orc/orc2csv/TestOrcFile.testDate1900.csv",
+            "smalldata/parser/orc/orc2csv/TestOrcFile.testDate2038.csv",
+            "smalldata/parser/orc/orc2csv/orc_split_elim.csv", "smalldata/parser/csv2orc/prostate_NA.csv",
+            "smalldata/iris/iris.csv", "smalldata/jira/hexdev_29.csv"};
+
+    private String[] orcFiles = {"smalldata/parser/orc/TestOrcFile.testDate1900.orc",
+            "smalldata/parser/orc/TestOrcFile.testDate2038.orc", "smalldata/parser/orc/orc_split_elim.orc",
+            "smalldata/parser/orc/prostate_NA.orc", "smalldata/parser/orc/iris.orc",
+            "smalldata/parser/orc/hexdev_29.orc"};
+
+    private Boolean[] forceColumnTypes = {false, false, false, true, true, true};
+
+    @BeforeClass
+    static public void setup() { TestUtil.stall_till_cloudsize(5); }
+
+    @Test
+    public void testParseOrcCsvFiles() {
+        int f_index = 0;
+        Frame csv_frame = parse_test_file(csvFiles[f_index], "\\N", 0, null);
+        Frame orc_frame = null;
+
+        if (forceColumnTypes[f_index]) {
+            byte[] types = csv_frame.types();
+
+            for (int index = 0; index < types.length; index++) {
+                if (types[index] == 0)
+                    types[index] = 3;
+            }
+
+            orc_frame = parse_test_file(orcFiles[f_index], null, 0, types);
+        } else {
+            orc_frame = parse_test_file(orcFiles[f_index], null, 0, null);
+        }
+
+
+        // make sure column types are the same especially the enums
+            byte[] csv_types = csv_frame.types();
+            byte[] orc_types = orc_frame.types();
+
+            for (int index = 0; index < csv_frame.numCols(); index++) {
+                if ((csv_types[index] == 4) && (orc_types[index] == 2)) {
+                    orc_frame.replace(index, orc_frame.vec(index).toCategoricalVec().toNumericVec());
+                    csv_frame.replace(index, csv_frame.vec(index).toNumericVec());
+                }
+            }
+
+        assertTrue(TestUtil.isIdenticalUpToRelTolerance(csv_frame, orc_frame, 1e-5));
+
+        csv_frame.delete();
+        orc_frame.delete();
+    }
+}
\ No newline at end of file
diff --git a/h2o-parsers/h2o-orc-parser/src/test/java/water/parser/ParseTestOrc.java b/h2o-parsers/h2o-orc-parser/src/test/java/water/parser/ParseTestOrc.java
new file mode 100644
index 000000000000..d0e66af2cdb5
--- /dev/null
+++ b/h2o-parsers/h2o-orc-parser/src/test/java/water/parser/ParseTestOrc.java
@@ -0,0 +1,434 @@
+package water.parser;
+
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.*;
+import org.apache.hadoop.hive.ql.io.orc.OrcFile;
+import org.apache.hadoop.hive.ql.io.orc.Reader;
+import org.apache.hadoop.hive.ql.io.orc.RecordReader;
+import org.apache.hadoop.hive.ql.io.orc.StripeInformation;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.joda.time.DateTime;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import water.TestUtil;
+import water.fvec.Frame;
+import water.fvec.Vec;
+import water.util.Log;
+
+import java.util.*;
+
+import java.io.File;
+import java.io.IOException;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static water.parser.orc.OrcUtil.isSupportedSchema;
+
+/**
+ * Test suite for orc parser.
+ *
+ * This test will build a H2O frame for all orc files found in smalldata/parser/orc directory
+ * and compare the H2O frame content with the orc file content read with Core Java commands.
+ * Test is declared a success if the content of H2O frame is the same as the contents read
+ * by using core Java commands off the Orc file itself.  No multi-threading is used in reading
+ * off the Orc file using core Java commands.
+ */
+public class ParseTestOrc extends TestUtil {
+
+    private double EPSILON = 1e-9;
+    private long ERRORMARGIN = 1000L;  // error margin when compare timestamp.
+    int totalFilesTested = 0;
+    int numberWrong = 0;
+    BufferedString h2o = new BufferedString();
+    BufferedString tempOrc = new BufferedString();
+    public static final int DAY_TO_MS = 24*3600*1000;
+    public static final int ADD_OFFSET = 8*3600*1000;
+    public static final int HOUR_OFFSET = 3600000;  // in ms to offset for leap seconds, years
+
+    // list all orc files in smalldata/parser/orc directory
+    private String[] allOrcFiles = {
+            "smalldata/parser/orc/TestOrcFile.columnProjection.orc",
+            "smalldata/parser/orc/bigint_single_col.orc",
+            "smalldata/parser/orc/TestOrcFile.emptyFile.orc",
+            "smalldata/parser/orc/bool_single_col.orc",
+//          "smalldata/parser/orc/TestOrcFile.metaData.orc",
+//      "smalldata/parser/orc/decimal.orc",
+//      "smalldata/parser/orc/TestOrcFile.test1.orc",
+            "smalldata/parser/orc/demo-11-zlib.orc",
+            "smalldata/parser/orc/TestOrcFile.testDate1900.orc",
+            "smalldata/parser/orc/demo-12-zlib.orc",
+            "smalldata/parser/orc/TestOrcFile.testDate2038.orc",
+            "smalldata/parser/orc/double_single_col.orc",
+            "smalldata/parser/orc/TestOrcFile.testMemoryManagementV11.orc",
+            "smalldata/parser/orc/float_single_col.orc",
+            "smalldata/parser/orc/TestOrcFile.testMemoryManagementV12.orc",
+            "smalldata/parser/orc/int_single_col.orc",
+            "smalldata/parser/orc/TestOrcFile.testPredicatePushdown.orc",
+            "smalldata/parser/orc/nulls-at-end-snappy.orc",
+//      "smalldata/parser/orc/TestOrcFile.testSeek.orc",
+//      "smalldata/parser/orc/orc-file-11-format.orc",
+            "smalldata/parser/orc/TestOrcFile.testSnappy.orc",
+            "smalldata/parser/orc/orc_split_elim.orc",
+            "smalldata/parser/orc/TestOrcFile.testStringAndBinaryStatistics.orc",
+//          "smalldata/parser/orc/over1k_bloom.orc",
+            "smalldata/parser/orc/TestOrcFile.testStripeLevelStats.orc",
+            "smalldata/parser/orc/smallint_single_col.orc",
+//          "smalldata/parser/orc/TestOrcFile.testTimestamp.orc",
+            "smalldata/parser/orc/string_single_col.orc",
+//          "smalldata/parser/orc/TestOrcFile.testUnionAndTimestamp.orc",
+            "smalldata/parser/orc/tinyint_single_col.orc",
+            "smalldata/parser/orc/TestOrcFile.testWithoutIndex.orc",
+//          "smalldata/parser/orc/version1999.orc"
+    };
+
+    @BeforeClass
+    static public void setup() { TestUtil.stall_till_cloudsize(5); }
+
+    @Test
+    public void testParseAllOrcs() {
+        Set<String> failedFiles = new TreeSet<>();
+        int numOfOrcFiles = allOrcFiles.length; // number of Orc Files to test
+
+        for (int fIndex = 0; fIndex < numOfOrcFiles; fIndex++)
+        {
+
+//      if ((fIndex == 4) || (fIndex == 6) || (fIndex == 18) || (fIndex == 23) || (fIndex == 28))
+//        continue;   // do not support metadata from user
+//
+//      if (fIndex == 31)   // contain only orc header, no column and no row, total file size is 0.
+//        continue;
+//
+//      if (fIndex == 19)   // different column names are used between stripes
+//        continue;
+//
+//      if (fIndex == 26)   // abnormal orc file, no inpsector structure available
+//        continue;
+
+//      if (fIndex ==30)    // problem getting the right column number and then comparison problem
+//        continue;
+
+//      if (fIndex == 22)     // problem with BufferedString retrieval for binary, wait for Tomas
+//        continue;
+//
+//      if (fIndex == 17)   // problem with bigint retrieval, wait for Tomas
+//        continue;
+
+//      Random rn = new Random();
+//      int randNum = rn.nextInt(10);
+//
+//      if (randNum > 3)  // skip test for 70% of the time
+//        continue;
+
+            String fileName = allOrcFiles[fIndex];
+            Log.info("Orc Parser parsing " + fileName);
+            File f = find_test_file_static(fileName);
+
+            if (f != null && f.exists()) {
+                Configuration conf = new Configuration();
+                Path p = new Path(f.toString());
+                try {
+                    Reader orcFileReader = OrcFile.createReader(p, OrcFile.readerOptions(conf));     // orc reader
+                    Frame h2oFrame = parse_test_file(fileName);     // read one orc file and build a H2O frame
+
+                    compareH2OFrame(fileName, failedFiles, h2oFrame, orcFileReader);
+
+                    if (h2oFrame != null) // delete frame after done.
+                        h2oFrame.delete();
+
+                    totalFilesTested++;
+
+                } catch (IOException e) {
+                    e.printStackTrace();
+                    failedFiles.add(fileName);
+                    numberWrong++;
+                }
+
+            } else {
+                Log.warn("The following file was not found: " + fileName);
+                failedFiles.add(fileName);
+                numberWrong++;
+            }
+        }
+
+        if (numberWrong > 0) {
+            Log.warn("There are errors in your test.");
+            assertEquals("Number of orc files failed to parse is: " + numberWrong + ", failed files = " +
+                    failedFiles.toString(), 0, numberWrong);
+        } else {
+            Log.info("Parser test passed!  Number of files parsed is " + totalFilesTested);
+        }
+    }
+
+    /**
+     * This method will take one H2O frame generated by the Orc parser and the fileName of the Orc file
+     * and attempt to compare the content of the Orc file to the H2O frame.  In particular, the following
+     * are compared:
+     * - column names;
+     * - number of columns and rows;
+     * - content of each row.
+     *
+     * If all comparison pass, the test will pass.  Otherwise, the test will fail.
+     *
+     * @param h2oFrame
+     * @param orcReader
+     */
+    private void compareH2OFrame(String fileName, Set<String> failedFiles, Frame h2oFrame, Reader orcReader) {
+        // grab column names, column and row numbers
+        StructObjectInspector insp = (StructObjectInspector) orcReader.getObjectInspector();
+        List<StructField> allColInfo = (List<StructField>) insp.getAllStructFieldRefs();    // get info of all cols
+
+        // compare number of columns and rows
+        int allColNumber = allColInfo.size();    // get and check column number
+        boolean[] toInclude = new boolean[allColNumber+1];
+
+        int colNumber = 0 ;
+        int index1 = 0;
+        for (StructField oneField:allColInfo) {
+            String colType = oneField.getFieldObjectInspector().getTypeName();
+
+            if (colType.toLowerCase().contains("decimal"))
+                colType = "decimal";
+
+            if (isSupportedSchema(colType)) {
+                toInclude[index1 + 1] = true;
+                colNumber++;
+            }
+
+            index1++;
+        }
+
+        assertEquals("Number of columns need to be the same: ", colNumber, h2oFrame.numCols());
+
+        // compare column names
+        String[] colNames = new String[colNumber];
+        String[] colTypes = new String[colNumber];
+        int colIndex = 0;
+
+        for (int index = 0; index < allColNumber; index++) {   // get and check column names
+            String typeName = allColInfo.get(index).getFieldObjectInspector().getTypeName();
+
+            if (typeName.toLowerCase().contains("decimal"))
+                typeName = "decimal";
+
+            if (isSupportedSchema(typeName)) {
+                colNames[colIndex] = allColInfo.get(index).getFieldName();
+                colTypes[colIndex] = typeName;
+                colIndex++;
+            }
+        }
+        assertArrayEquals("Column names need to be the same: ", colNames, h2oFrame._names);
+
+        // compare one column at a time of the whole row?
+        compareFrameContents(fileName, failedFiles, h2oFrame, orcReader, colTypes, colNames, toInclude);
+
+        Long totalRowNumber = orcReader.getNumberOfRows();    // get and check row number
+        assertEquals("Number of rows need to be the same: ", totalRowNumber, (Long) h2oFrame.numRows());
+
+    }
+
+
+    private void compareFrameContents(String fileName, Set<String> failedFiles, Frame h2oFrame, Reader orcReader,
+                                      String[] colTypes, String[] colNames, boolean[] toInclude) {
+        // prepare parameter to read a orc file.
+//    boolean[] toInclude = new boolean[colNumber+1];   // must equal to number of column+1
+//    Arrays.fill(toInclude, true);
+
+        List<StripeInformation> stripesInfo = orcReader.getStripes(); // get all stripe info
+
+        if (stripesInfo.size() == 0) {  // Orc file contains no data
+            assertEquals("Orc file is empty.  H2O frame row number should be zero: ", 0, h2oFrame.numRows());
+        } else {
+            Long startRowIndex = 0L;   // row index into H2O frame
+            for (StripeInformation oneStripe : stripesInfo) {
+                try {
+                    RecordReader perStripe = orcReader.rows(oneStripe.getOffset(), oneStripe.getDataLength(), toInclude, null,
+                            colNames);
+                    VectorizedRowBatch batch = perStripe.nextBatch(null);  // read orc file stripes in vectorizedRowBatch
+
+                    boolean done = false;
+                    Long rowCounts = 0L;
+                    Long rowNumber = oneStripe.getNumberOfRows();   // row number of current stripe
+
+                    while (!done) {
+                        long currentBatchRow = batch.count();     // row number of current batch
+
+                        ColumnVector[] dataVectors = batch.cols;
+
+                        int colIndex = 0;
+                        for (int cIdx = 0; cIdx < batch.numCols; cIdx++) {   // read one column at a time;
+                            if (toInclude[cIdx+1]) {
+                                compare1Cloumn(dataVectors[cIdx], colTypes[colIndex].toLowerCase(), colIndex, currentBatchRow,
+                                        h2oFrame.vec(colNames[colIndex]), startRowIndex);
+                                colIndex++;
+                            }
+                        }
+
+                        rowCounts = rowCounts + currentBatchRow;    // record number of rows of data actually read
+                        startRowIndex = startRowIndex + currentBatchRow;
+
+                        if (rowCounts >= rowNumber)               // read all rows of the stripe already.
+                            done = true;
+
+                        if (!done)  // not done yet, get next batch
+                            batch = perStripe.nextBatch(batch);
+                    }
+
+                    perStripe.close();
+                } catch (Throwable e) {
+                    numberWrong++;
+                    failedFiles.add(fileName);
+                    e.printStackTrace();
+                    //         assertEquals("Test failed! ", true, false);
+                }
+            }
+        }
+    }
+
+    private void compare1Cloumn(ColumnVector oneColumn, String columnType, int cIdx, long currentBatchRow,
+                                Vec h2oColumn, Long startRowIndex) {
+
+//    if (columnType.contains("bigint"))  // cannot handle big integer right now
+//      return;
+
+        if (columnType.contains("binary"))  // binary retrieval problem.  Tomas
+            return;
+
+        switch (columnType) {
+            case "boolean":
+            case "bigint":  // FIXME: not working right now
+            case "int":
+            case "smallint":
+            case "tinyint":
+                CompareLongcolumn(oneColumn, oneColumn.isNull, currentBatchRow, h2oColumn, startRowIndex);
+                break;
+            case "float":
+            case "double":
+                compareDoublecolumn(oneColumn, oneColumn.isNull, currentBatchRow, h2oColumn, startRowIndex);
+                break;
+            case "string":  //FIXME: not working right now
+            case "varchar":
+            case "char":
+            case "binary":  //FIXME: only reading it as string right now.
+                compareStringcolumn(oneColumn, oneColumn.isNull, currentBatchRow, h2oColumn, startRowIndex, columnType);
+                break;
+            case "timestamp":
+            case "date":
+                compareTimecolumn(oneColumn, columnType, oneColumn.isNull, currentBatchRow, h2oColumn, startRowIndex);
+                break;
+            case "decimal":
+                compareDecimalcolumn(oneColumn, oneColumn.isNull, currentBatchRow, h2oColumn, startRowIndex);
+                break;
+            default:
+                Log.warn("String, bigint are not tested.  H2O frame is built for them but cannot be verified.");
+        }
+    }
+
+    private void compareDecimalcolumn(ColumnVector oneDecimalColumn, boolean[] isNull,
+                                      long currentBatchRow, Vec h2oFrame, Long startRowIndex) {
+        HiveDecimalWritable[] oneColumn= ((DecimalColumnVector) oneDecimalColumn).vector;
+        long frameRowIndex = startRowIndex;
+
+        for (int rowIndex = 0; rowIndex < currentBatchRow; rowIndex++) {
+            if (isNull[rowIndex])
+                assertEquals("Na is found: ", true, h2oFrame.isNA(frameRowIndex));
+            else
+                assertEquals("Decimal elements should equal: ", Double.parseDouble(oneColumn[rowIndex].toString()),
+                        h2oFrame.at(frameRowIndex), EPSILON);
+
+            frameRowIndex++;
+        }
+    }
+
+    private void compareTimecolumn(ColumnVector oneTSColumn, String columnType, boolean[] isNull, long currentBatchRow,
+                                   Vec h2oFrame, Long startRowIndex) {
+        long[] oneColumn = ((LongColumnVector) oneTSColumn).vector;
+        long frameRowIndex = startRowIndex;
+
+        for (int rowIndex = 0; rowIndex < currentBatchRow; rowIndex++) {
+            if (isNull[rowIndex])
+                assertEquals("Na is found: ", true, h2oFrame.isNA(frameRowIndex));
+            else {
+                if (columnType.contains("timestamp"))
+                    assertEquals("Numerical elements should equal: ", oneColumn[rowIndex]/1000000, h2oFrame.at8(frameRowIndex),
+                            ERRORMARGIN);
+                else
+                    assertEquals("Numerical elements should equal: ", correctTimeStamp(oneColumn[rowIndex]),
+                            h2oFrame.at8(frameRowIndex), ERRORMARGIN);
+            }
+
+            frameRowIndex++;
+        }
+    }
+
+    private void compareStringcolumn(ColumnVector oneStringColumn, boolean[] isNull,
+                                     long currentBatchRow, Vec h2oFrame, Long startRowIndex, String columnType) {
+        byte[][] oneColumn = ((BytesColumnVector) oneStringColumn).vector;
+        int[] stringLength = ((BytesColumnVector) oneStringColumn).length;
+        int[] stringStart = ((BytesColumnVector) oneStringColumn).start;
+        long frameRowIndex = startRowIndex;
+
+        for (int rowIndex = 0; rowIndex < currentBatchRow; rowIndex++) {
+            if (isNull[rowIndex])
+                assertEquals("Na is found: ", true, h2oFrame.isNA(frameRowIndex));
+            else {
+                if (!oneStringColumn.isRepeating || rowIndex == 0)
+                    tempOrc.set(oneColumn[rowIndex], stringStart[rowIndex], stringLength[rowIndex]);
+                h2oFrame.atStr(h2o, frameRowIndex);
+                assertEquals("isRepeating = " + oneStringColumn.isRepeating + " String/char elements should equal: ", true, tempOrc.equals(h2o));
+            }
+
+            frameRowIndex++;
+        }
+    }
+
+    private void compareDoublecolumn(ColumnVector oneDoubleColumn, boolean[] isNull,
+                                     long currentBatchRow, Vec h2oFrame, Long startRowIndex) {
+        double[] oneColumn= ((DoubleColumnVector) oneDoubleColumn).vector;
+        long frameRowIndex = startRowIndex;
+
+        for (int rowIndex = 0; rowIndex < currentBatchRow; rowIndex++) {
+            if (isNull[rowIndex])
+                assertEquals("Na is found: ", true, h2oFrame.isNA(frameRowIndex));
+            else
+                assertEquals("Numerical elements should equal: ", oneColumn[rowIndex], h2oFrame.at(frameRowIndex), EPSILON);
+
+            frameRowIndex++;
+        }
+    }
+
+    private void CompareLongcolumn(ColumnVector oneLongColumn, boolean[] isNull,
+                                   long currentBatchRow, Vec h2oFrame, Long startRowIndex) {
+        long[] oneColumn= ((LongColumnVector) oneLongColumn).vector;
+        long frameRowIndex = startRowIndex;
+
+        for (int rowIndex = 0; rowIndex < currentBatchRow; rowIndex++) {
+            if (isNull[rowIndex])
+                assertEquals("Na is found: ", true, h2oFrame.isNA(frameRowIndex));
+            else {
+                if (h2oFrame.isNA(frameRowIndex))
+                    continue;
+                else
+                    assertEquals("Numerical elements should equal: ", oneColumn[rowIndex], h2oFrame.at8(frameRowIndex));
+            }
+
+            frameRowIndex++;
+        }
+    }
+
+    private long correctTimeStamp(long daysSinceEpoch) {
+        long timestamp = (daysSinceEpoch*DAY_TO_MS+ADD_OFFSET);
+
+        DateTime date = new DateTime(timestamp);
+
+        int hour = date.hourOfDay().get();
+
+        if (hour == 0)
+            return timestamp;
+        else
+            return (timestamp-hour*HOUR_OFFSET);
+    }
+}
\ No newline at end of file
diff --git a/h2o-parsers/h2o-orc-parser/testMultiNode.sh b/h2o-parsers/h2o-orc-parser/testMultiNode.sh
new file mode 100755
index 000000000000..093991e0d23d
--- /dev/null
+++ b/h2o-parsers/h2o-orc-parser/testMultiNode.sh
@@ -0,0 +1,124 @@
+#!/bin/bash
+
+# Argument parsing
+if [ "$1" = "jacoco" ]
+then
+    JACOCO_ENABLED=true
+else
+    JACOCO_ENABLED=false
+fi
+
+# Clean out any old sandbox, make a new one
+OUTDIR=sandbox
+rm -fr $OUTDIR; mkdir -p $OUTDIR
+
+# Check for os
+SEP=:
+case "`uname`" in
+    CYGWIN* )
+      SEP=";"
+      ;;
+esac
+
+function cleanup () {
+  kill -9 ${PID_1} ${PID_2} ${PID_3} ${PID_4} 1> /dev/null 2>&1
+  wait 1> /dev/null 2>&1
+  RC=`cat $OUTDIR/status.0`
+  if [ $RC -ne 0 ]; then
+    cat $OUTDIR/out.0
+    echo h2o-orc-parser junit tests FAILED
+  else
+    echo h2o-orc-parser junit tests PASSED
+  fi
+  exit $RC
+}
+
+trap cleanup SIGTERM SIGINT
+
+# Find java command
+if [ -z "$TEST_JAVA_HOME" ]; then
+  # Use default
+  JAVA_CMD="java"
+else
+  # Use test java home
+  JAVA_CMD="$TEST_JAVA_HOME/bin/java"
+  # Increase XMX since JAVA_HOME can point to java6
+  JAVA6_REGEXP=".*1\.6.*"
+  if [[ $TEST_JAVA_HOME =~ $JAVA6_REGEXP ]]; then
+    JAVA_CMD="${JAVA_CMD}"
+  fi
+fi
+
+MAX_MEM="-Xmx3g"
+
+# Check if coverage should be run
+if [ $JACOCO_ENABLED = true ]
+then
+    AGENT="../../jacoco/jacocoagent.jar"
+    COVERAGE="-javaagent:$AGENT=destfile=build/jacoco/h2o-parser_orc.exec"
+    MAX_MEM="-Xmx3g"
+else
+    COVERAGE=""
+fi
+
+# Gradle puts files:
+#   build/classes/main - Main h2o core classes
+#   build/classes/test - Test h2o core classes
+#   build/resources/main - Main resources (e.g. page.html)
+JVM="nice $JAVA_CMD -ea $COVERAGE $MAX_MEM -Xms3g -cp build/libs/h2o-orc-parser-test.jar${SEP}build/libs/h2o-orc-parser.jar${SEP}../../h2o-core/build/libs/h2o-core-test.jar${SEP}../../h2o-core/build/libs/h2o-core.jar${SEP}../../h2o-genmodel/build/libs/h2o-genmodel.jar${SEP}../../lib/*"
+
+echo "$JVM" > $OUTDIR/jvm_cmd.txt
+# Ahhh... but the makefile runs the tests skipping the jar'ing step when possible.
+# Also, sometimes see test files in the main-class directory, so put the test
+# classpath before the main classpath.
+#JVM="nice java -ea -cp build/classes/test${SEP}build/classes/main${SEP}../h2o-core/build/classes/test${SEP}../h2o-core/build/classes/main${SEP}../lib/*"
+
+# Tests
+# Must run first, before the cloud locks (because it tests cloud locking)
+JUNIT_TESTS_BOOT="<NOTHING>"
+JUNIT_TESTS_BIG="<NOTHING>"
+
+# Runner
+# Default JUnit runner is org.junit.runner.JUnitCore
+JUNIT_RUNNER="water.junit.H2OTestRunner"
+
+# find all java in the src/test directory
+# Cut the "./water/MRThrow.java" down to "water/MRThrow.java"
+# Cut the   "water/MRThrow.java" down to "water/MRThrow"
+# Slash/dot "water/MRThrow"      becomes "water.MRThrow"
+
+# On this h2o-algos testMultiNode.sh only, force the tests.txt to be in the same order for all machines.
+# If sorted, the result of the cd/grep varies by machine.
+# If randomness is desired, replace sort with the unix 'shuf'
+# Use /usr/bin/sort because of cygwin on windows.
+# Windows has sort.exe which you don't want. Fails? (is it a lineend issue)
+(cd src/test/java; /usr/bin/find . -name '*.java' | cut -c3- | sed 's/.....$//' | sed -e 's/\//./g') | grep -v $JUNIT_TESTS_BOOT | grep -v $JUNIT_TESTS_BIG | /usr/bin/sort > $OUTDIR/tests.txt
+
+# Output the comma-separated list of ignored/dooonly tests
+# Ignored tests trump do-only tests
+echo $IGNORE > $OUTDIR/tests.ignore.txt
+echo $DOONLY > $OUTDIR/tests.doonly.txt
+
+# Launch 4 helper JVMs.  All output redir'd at the OS level to sandbox files.
+CLUSTER_NAME=junit_cluster_$$
+CLUSTER_BASEPORT=44000
+$JVM water.H2O -name $CLUSTER_NAME -baseport $CLUSTER_BASEPORT -ga_opt_out 1> $OUTDIR/out.1 2>&1 & PID_1=$!
+$JVM water.H2O -name $CLUSTER_NAME -baseport $CLUSTER_BASEPORT -ga_opt_out 1> $OUTDIR/out.2 2>&1 & PID_2=$!
+$JVM water.H2O -name $CLUSTER_NAME -baseport $CLUSTER_BASEPORT -ga_opt_out 1> $OUTDIR/out.3 2>&1 & PID_3=$!
+$JVM water.H2O -name $CLUSTER_NAME -baseport $CLUSTER_BASEPORT -ga_opt_out 1> $OUTDIR/out.4 2>&1 & PID_4=$!
+
+# If coverage is being run, then pass a system variable flag so that timeout limits are increased.
+if [ $JACOCO_ENABLED = true ]
+then
+    JACOCO_FLAG="-Dtest.jacocoEnabled=true"
+else
+    JACOCO_FLAG=""
+fi
+
+# Launch last driver JVM.  All output redir'd at the OS level to sandbox files.
+echo Running h2o-orc-parser junit tests...
+($JVM -Ddoonly.tests=$DOONLY -Dbuild.id=$BUILD_ID -Dignore.tests=$IGNORE -Djob.name=$JOB_NAME -Dgit.commit=$GIT_COMMIT -Dgit.branch=$GIT_BRANCH -Dai.h2o.name=$CLUSTER_NAME -Dai.h2o.baseport=$CLUSTER_BASEPORT -Dai.h2o.ga_opt_out=yes $JACOCO_FLAG $JUNIT_RUNNER `cat $OUTDIR/tests.txt` 2>&1 ; echo $? > $OUTDIR/status.0) 1> $OUTDIR/out.0 2>&1
+
+grep EXECUTION $OUTDIR/out.0 | sed -e "s/.*TEST \(.*\) EXECUTION TIME: \(.*\) (Wall.*/\2 \1/" | sort -gr | head -n 10 >> $OUTDIR/out.0
+
+cleanup
\ No newline at end of file
diff --git a/h2o-persist-hdfs/build.gradle b/h2o-persist-hdfs/build.gradle
index ef0cb0588aae..80787759147e 100644
--- a/h2o-persist-hdfs/build.gradle
+++ b/h2o-persist-hdfs/build.gradle
@@ -1,9 +1,13 @@
+
 description = "H2O Persist HDFS"
 
 dependencies {
-  compile project(":h2o-core")
-  compile('net.java.dev.jets3t:jets3t:0.6.1')
-  compile("org.apache.hadoop:hadoop-client:2.0.0-cdh4.3.0") {
-      transitive = true
-  }
-}
+    compile project(":h2o-core")
+    compile('net.java.dev.jets3t:jets3t:0.6.1')
+    def hadoopVersion = project.hasProperty("doIncludeOrc") && project.doIncludeOrc == "true" ?
+                        orcDefaultHadoopClientVersion : defaultHadoopClientVersion
+    compile("org.apache.hadoop:hadoop-client:$hadoopVersion") {
+        // Pull all dependencies to allow run directly from IDE or command line
+        transitive = true
+    }
+}
\ No newline at end of file
diff --git a/h2o-persist-hdfs/src/main/java/water/persist/PersistHdfs.java b/h2o-persist-hdfs/src/main/java/water/persist/PersistHdfs.java
index 9014ed46ee09..3f1166f99d32 100644
--- a/h2o-persist-hdfs/src/main/java/water/persist/PersistHdfs.java
+++ b/h2o-persist-hdfs/src/main/java/water/persist/PersistHdfs.java
@@ -12,6 +12,7 @@
 import java.io.OutputStream;
 import java.net.SocketTimeoutException;
 import java.net.URI;
+import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.concurrent.Callable;
@@ -29,6 +30,8 @@
 import water.util.FileUtils;
 import water.util.Log;
 
+import static water.fvec.FileVec.getPathForKey;
+
 /**
  * HDFS persistence layer.
  */
@@ -38,12 +41,6 @@ public final class PersistHdfs extends Persist {
   /** Root path of HDFS */
   private final Path _iceRoot;
 
-  // Returns String with path for given key.
-  private static String getPathForKey(Key k) {
-    final int off = k._kb[0]==Key.CHK ? Vec.KEY_PREFIX_LEN : 0;
-    return new String(k._kb,off,k._kb.length-off);
-  }
-
   // Global HDFS initialization
   // FIXME: do not share it via classes, but initialize it by object
   static {
@@ -145,13 +142,13 @@ public PersistHdfs(URI uri) {
     long end, start = System.currentTimeMillis();
     final byte[] b = MemoryManager.malloc1(v._max);
     Key k = v._key;
+
     long skip = k.isChunkKey() ? water.fvec.NFSFileVec.chunkOffset(k) : 0;
     final Path p = _iceRoot == null?new Path(getPathForKey(k)):new Path(_iceRoot, getIceName(v));
     final long skip_ = skip;
     run(new Callable() {
       @Override public Object call() throws Exception {
         FileSystem fs = FileSystem.get(p.toUri(), CONF);
-
         FSDataInputStream s = null;
         try {
 //          fs.getDefaultBlockSize(p);
@@ -324,6 +321,17 @@ public Key uriToKey(URI uri) throws IOException {
     return HDFSFileVec.make(fstatus[0].getPath().toString(), fstatus[0].getLen());
   }
 
+  public static FileSystem getFS(String path) throws IOException {
+    try {
+      return getFS(new URI(path));
+    } catch (URISyntaxException e) {
+      throw new RuntimeException(e);
+    }
+  }
+  public static FileSystem getFS(URI uri) throws IOException {
+    return FileSystem.get(uri, PersistHdfs.CONF);
+  }
+
   // Is there a bucket name without a trailing "/" ?
   private boolean isBareS3NBucketWithoutTrailingSlash(String s) {
     String s2 = s.toLowerCase();
diff --git a/h2o-py/h2o/expr.py b/h2o-py/h2o/expr.py
index b512a3752edb..206fc1c22a54 100644
--- a/h2o-py/h2o/expr.py
+++ b/h2o-py/h2o/expr.py
@@ -321,7 +321,8 @@ def _fill_data(self, json):
             # token NaN, so the default python json decoder does not convert them
             # to math.nan.  Do that now.
             else:
-                c['data'] = [float('nan') if x == "NaN" else x for x in c['data']]
+                if c['data'] and (len(c['data']) > 0):  # orc file parse can return frame with zero rows
+                    c['data'] = [float('nan') if x == "NaN" else x for x in c['data']]
             self._data[c.pop('label')] = c  # Label used as the Key
         return self
 
diff --git a/h2o-py/tests/pyunit_utils/utilsPY.py b/h2o-py/tests/pyunit_utils/utilsPY.py
index 4e720ee21a7f..469512d827db 100644
--- a/h2o-py/tests/pyunit_utils/utilsPY.py
+++ b/h2o-py/tests/pyunit_utils/utilsPY.py
@@ -4,6 +4,12 @@
 from builtins import range
 from past.builtins import basestring
 import sys, os
+
+try:        # works with python 2.7 not 3
+    from StringIO import StringIO
+except:     # works with python 3
+    from io import StringIO
+
 sys.path.insert(1, "../../")
 import h2o
 import imp
@@ -2522,3 +2528,244 @@ def write_hyper_parameters_json(dir1, dir2, json_filename, hyper_parameters):
     # save hyper-parameter file in sandbox
     with open(os.path.join(dir2, json_filename), 'w') as test_file:
         json.dump(hyper_parameters, test_file)
+
+
+def compare_frames(frame1, frame2, numElements, tol_time=0, tol_numeric=0, strict=False, compare_NA=True):
+    """
+    This function will compare two H2O frames to make sure their dimension, and values in all cells are the same.
+    It will not compare the column names though.
+
+    :param frame1: H2O frame to be compared
+    :param frame2: H2O frame to be compared
+    :param numElements: integer to denote number of rows to compare.  Done to reduce compare time.
+        Set to 0 or negative number if you want to compare all elements.
+    :param tol_time: optional parameter to limit time value difference.
+    :param tol_numerica: optional parameter to limit numeric value difference.
+    :param strict: optional parameter to enforce strict comparison or not.  If True, column type must
+        match in order to pass the test.
+    :param compare_NA: optional parameter to compare NA or not.  For csv file generated from orc file, the
+        NAs are represented as some other symbol but our CSV will not be able to parse it correctly as NA.
+        In this case, do not compare the number of NAs.
+    :return: boolean: True, the two frames are equal and False otherwise.
+    """
+
+    # check frame dimensions
+    rows1, cols1 = frame1.dim
+    rows2, cols2 = frame2.dim
+
+    assert rows1 == rows2 and cols1 == cols2, "failed dim check! frame 1 rows:{0} frame 2 rows:{1} frame 1 cols:{2} " \
+                                              "frame2 cols:{3}".format(rows1, rows2, cols1, cols2)
+
+    na_frame1 = frame1.isna().sum()
+    na_frame2 = frame2.isna().sum()
+
+    if compare_NA:      # check number of missing values
+        assert na_frame1 == na_frame2, "failed numbers of NA check!  Frame 1 NA number: {0}, frame 2 " \
+                                   "NA number: {1}".format(na_frame1, na_frame2)
+
+    # check column types are the same before proceeding to check each row content.
+    for col_ind in range(cols1):
+
+        c1_key = frame1.columns[col_ind]
+        c2_key = frame2.columns[col_ind]
+        c2_type = frame2.types[c2_key]
+        c1_type = frame1.types[c1_key]
+
+        print("###### Comparing column: {0} and column type is {1}.".format(col_ind, c1_type))
+
+        if strict:  # every column type must match
+            assert c1_type == c2_type, "failed column type check! frame1 col type: {0}, frame2 col type: " \
+                                       "{1}".format(c1_type, c2_type)
+        else:
+            if str(c2_type) == 'enum':  # orc files do not have enum column type.  We convert it here
+                frame1[col_ind].asfactor()
+            else:
+                assert c1_type == c2_type, "failed column type check! frame1 col type: {0}, frame2 col type: " \
+                                           "{1}".format(c1_type, c2_type)
+        # compare string
+        if (str(c1_type) == 'string') or (str(c1_type) == 'enum'):
+            compareOneStringColumn(frame1, frame2, col_ind, rows1, numElements)
+        else:
+            if str(c2_type) == 'time':  # compare time columns
+                compareOneNumericColumn(frame1, frame2, col_ind, rows1, tol_time, numElements)
+            else:
+                compareOneNumericColumn(frame1, frame2, col_ind, rows1, tol_numeric, numElements)
+    return True
+
+
+def compareOneStringColumn(frame1, frame2, col_ind, rows, numElements):
+    """
+    This function will compare two String columns of two H2O frames to make sure that they are the same.
+
+    :param frame1: H2O frame to be compared
+    :param frame2: H2O frame to be compared
+    :param col_ind: integer denoting column index to compare the two frames
+    :param rows: integer denoting number of rows in the column
+    :param numElements: integer to denote number of rows to compare.  Done to reduce compare time
+    :return: None.  Will throw exceptions if comparison failed.
+    """
+
+    row_indices = list(range(rows))
+    if numElements > 0:
+        random.shuffle(row_indices)
+    else:
+        numElements = rows
+
+    for ele_ind in range(numElements):
+        row_ind = row_indices[ele_ind]
+
+        val1 = frame1[row_ind, col_ind]
+        val2 = frame2[row_ind, col_ind]
+
+        assert val1 == val2, "failed frame values check! frame1 value: {0}, frame2 value: {1} at row {2}, column " \
+                             "{3}".format(val1, val2, row_ind, col_ind)
+
+
+def compareOneNumericColumn(frame1, frame2, col_ind, rows, tolerance, numElements):
+    """
+    This function compares two numeric columns of two H2O frames to make sure that they are close.
+
+    :param frame1: H2O frame to be compared
+    :param frame2: H2O frame to be compared
+    :param col_ind: integer denoting column index to compare the two frames
+    :param rows: integer denoting number of rows in the column
+    :param tolerance: double parameter to limit numerical value difference.
+    :param numElements: integer to denote number of rows to compare.  Done to reduce compare time.
+    :return: None.  Will throw exceptions if comparison failed.
+    """
+
+    row_indices = []
+    if numElements > 0:
+        row_indices = random.sample(xrange(rows),numElements)
+    else:
+        numElements = rows  # Compare all elements
+        list(range(rows))
+
+    for ele_ind in range(numElements):
+        row_ind = row_indices[ele_ind]
+
+        val1 = frame1[row_ind, col_ind]
+        val2 = frame2[row_ind, col_ind]
+
+        if not(math.isnan(val1)) and not(math.isnan(val2)): # both frames contain valid elements
+            diff = abs(val1-val2)
+            assert diff <= tolerance, "failed frame values check! frame1 value = {0}, frame2 value =  {1}, " \
+                                      "at row {2}, column {3}.  The difference is {4}.".format(val1, val2, row_ind,
+                                                                                               col_ind, diff)
+        elif math.isnan(val1) and math.isnan(val2): # both frame contains missing values
+            continue
+        else:   # something is wrong, one frame got a missing value while the other is fine.
+            assert 1 == 2,  "failed frame values check! frame1 value {0}, frame2 value {1} at row {2}, " \
+                            "column {3}".format(val1, val2, row_ind, col_ind)
+
+import warnings
+
+def expect_warnings(filewithpath, warn_phrase="warn", warn_string_of_interest="warn", number_of_times=1):
+    """
+            This function will execute a command to run and analyze the print outs of
+    running the command.  The goal here is to capture any warnings that we may expect
+    out of running those commands.
+
+    :param filewithpath: name of file to be parsed with path
+    :param warn_phrase: capture the warning header, sometimes it is warn or userwarn.
+    :param warn_string_of_interest: specific warning message string
+    :param number_of_times: number of warning lines we are expecting.
+    :return: True if warning was found and False otherwise
+    """
+
+    number_warngings = 0
+
+    buffer = StringIO()     # redirect warning messages to string buffer for later analysis
+    sys.stderr = buffer
+
+    frame = h2o.import_file(path=locate(filewithpath))
+
+    sys.stderr = sys.__stderr__     # redirect it back to stdout.
+    try:        # for python 2.7
+        if len(buffer.buflist) > 0:
+            for index in range(len(buffer.buflist)):
+                if (warn_phrase in buffer.buflist[index]) and (warn_string_of_interest in buffer.buflist[index]):
+                    number_warngings = number_warngings+1
+    except:     # for python 3.
+        warns = buffer.getvalue()
+
+        if (warn_phrase in warns) and (warn_string_of_interest in warns):
+            number_warngings = number_warngings+1
+
+        number_of_times = 1
+
+    if number_warngings >= number_of_times:
+        return True
+    else:
+        return False
+
+
+def compare_frame_summary(frame1_summary, frame2_summary, compareNames=False, compareTypes=False):
+    """
+        This method is written to compare the frame summary between two frames.
+
+    :param frame1_summary:
+    :param frame2_summary:
+    :param compareNames:
+    :param compareTypes:
+    :return:
+    """
+
+    frame1_column_number = len(frame1_summary)
+    frame2_column_number = len(frame2_summary)
+
+    assert frame1_column_number == frame2_column_number, "failed column number check!  Frame 1 column number: {0}," \
+                                                         "frame 2 column number: {1}".format(frame1_column_number,
+                                                                                             frame2_column_number)
+
+    for col_index in range(frame1_column_number):   # check summary for each column
+        for key_val in list(frame1_summary[col_index]):
+
+            if not(compareNames) and (str(key_val) == 'label'):
+                continue
+
+            if not(compareTypes) and (str(key_val) == 'type'):
+                continue
+
+            if str(key_val) == 'precision':     # skip comparing precision
+                continue
+
+            val1 = frame1_summary[col_index][key_val]
+            val2 = frame2_summary[col_index][key_val]
+
+            if isinstance(val1, list) or isinstance(val1, dict):
+                if isinstance(val1, dict):
+                    assert cmp(val1, val2) == 0, "failed column summary comparison for column {0} and summary " \
+                                                 "type {1}, frame 1 value is {2}, frame 2 value is " \
+                                                 "{3}".format(col_index, str(key_val), val1, val2)
+                else:
+                    if len(val1) > 0:
+                        # find if elements are float
+                        float_found = False
+
+                        for ind in range(len(val1)):
+                            if isinstance(val1[ind], float):
+                                float_found = True
+                                break
+
+                        if float_found:
+                            for ind in range(len(val1)):
+                                if not(str(val1[ind] == 'NaN')):
+                                    assert abs(val1[ind]-val2[ind]) < 1e-5, "failed column summary comparison for " \
+                                                                            "column {0} and summary type {1}, frame 1" \
+                                                                            " value is {2}, frame 2 value is " \
+                                                                            "{3}".format(col_index, str(key_val),
+                                                                                         val1[ind], val2[ind])
+                        else:
+                            assert cmp(val1, val2) == 0, "failed column summary comparison for column {0} and summary" \
+                                                         " type {1}, frame 1 value is {2}, frame 2 value is " \
+                                                         "{3}".format(col_index, str(key_val), val1, val2)
+            else:
+                if isinstance(val1, float):
+                    assert abs(val1-val2) < 1e-5, "failed column summary comparison for column {0} and summary type " \
+                                                  "{1}, frame 1 value is {2}, frame 2 value is " \
+                                                  "{3}".format(col_index, str(key_val), val1, val2)
+                else:
+                    assert val1 == val2, "failed column summary comparison for column {0} and summary type " \
+                                         "{1}, frame 1 value is {2}, frame 2 value is " \
+                                         "{3}".format(col_index, str(key_val), val1, val2)
\ No newline at end of file
diff --git a/h2o-py/tests/testdir_hdfs/index.list b/h2o-py/tests/testdir_hdfs/index.list
index 7e34bea48c88..f3be85484759 100644
--- a/h2o-py/tests/testdir_hdfs/index.list
+++ b/h2o-py/tests/testdir_hdfs/index.list
@@ -1,3 +1,10 @@
 pyunit_INTERNAL_HDFS_basic.py
 pyunit_INTERNAL_HDFS_import_export.py
+pyunit_INTERNAL_HDFS_airlines_orc.py
+pyunit_INTERNAL_HDFS_hexdev_29_import_types_orc.py
+pyunit_INTERNAL_HDFS_iris_import_types_orc.py
+pyunit_INTERNAL_HDFS_milsongs_orc_large.py
+pyunit_INTERNAL_HDFS_orc_parser.py
+pyunit_INTERNAL_HDFS_prostate_orc.py
+pyunit_INTERNAL_HDFS_timestamp_date_orc.py
 
diff --git a/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_airlines_orc.py b/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_airlines_orc.py
new file mode 100644
index 000000000000..0f004f074dfc
--- /dev/null
+++ b/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_airlines_orc.py
@@ -0,0 +1,81 @@
+from __future__ import print_function
+import sys
+sys.path.insert(1,"../../")
+import h2o
+import time
+from tests import pyunit_utils
+#----------------------------------------------------------------------
+# Purpose:  This test will test orc-parser in HDFS with data files of
+# significant size split across multi files.  Basically, we are testing
+# our multi-file parsing of Orc with big data sets.  This test is
+# copied over from Nidhi R unit test.
+#----------------------------------------------------------------------
+
+
+def hdfs_orc_parser():
+
+    # Check if we are running inside the H2O network by seeing if we can touch
+    # the namenode.
+    hadoop_namenode_is_accessible = pyunit_utils.hadoop_namenode_is_accessible()
+
+    if hadoop_namenode_is_accessible:
+        numElements2Compare = 10
+        tol_time = 200
+        tol_numeric = 1e-5
+
+        hdfs_name_node = pyunit_utils.hadoop_namenode()
+        hdfs_orc_file = "/datasets/airlines_all_orc_parts"
+        hdfs_csv_file = "/datasets/air_csv_part"
+
+        col_types = ['real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'enum', 'real', 'enum', 'real',
+                     'real', 'enum', 'real', 'real', 'enum', 'enum', 'real', 'enum', 'enum', 'real', 'real', 'real',
+                     'enum', 'enum', 'enum', 'enum', 'enum', 'enum', 'enum']
+
+        # import CSV file
+        print("Import airlines 116M dataset in original csv format from HDFS")
+        url_csv = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_csv_file)
+
+        startcsv = time.time()
+        multi_file_csv = h2o.import_file(url_csv, na_strings=['\\N'], col_types=col_types)
+        endcsv = time.time()
+
+        startcsv1 = time.time()
+        multi_file_csv1 = h2o.import_file(url_csv)
+        endcsv1 = time.time()
+        h2o.remove(multi_file_csv1)
+
+        multi_file_csv.summary()
+        csv_summary = h2o.frame(multi_file_csv.frame_id)["frames"][0]["columns"]
+
+        # import ORC file with same column types as CSV file
+        print("Import airlines 116M dataset in ORC format from HDFS")
+        url_orc = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_orc_file)
+
+        startorc1 = time.time()
+        multi_file_orc1 = h2o.import_file(url_orc)
+        endorc1 = time.time()
+        h2o.remove(multi_file_orc1)
+
+        startorc = time.time()
+        multi_file_orc = h2o.import_file(url_orc, col_types=col_types)
+        endorc = time.time()
+
+        multi_file_orc.summary()
+        orc_summary = h2o.frame(multi_file_orc.frame_id)["frames"][0]["columns"]
+
+        print("************** CSV (without column type forcing) parse time is {0}".format(endcsv1-startcsv1))
+        print("************** CSV (with column type forcing) parse time is {0}".format(endcsv-startcsv))
+        print("************** ORC (without column type forcing) parse time is {0}".format(endorc1-startorc1))
+        print("************** ORC (with column type forcing) parse time is {0}".format(endorc-startorc))
+
+    # compare frame read by orc by forcing column type,
+        pyunit_utils.compare_frame_summary(csv_summary, orc_summary)
+
+    else:
+        raise EnvironmentError
+
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(hdfs_orc_parser)
+else:
+    hdfs_orc_parser()
\ No newline at end of file
diff --git a/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_baddata_orc.py b/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_baddata_orc.py
new file mode 100644
index 000000000000..178e402cbcfe
--- /dev/null
+++ b/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_baddata_orc.py
@@ -0,0 +1,44 @@
+from __future__ import print_function
+import sys
+sys.path.insert(1,"../../")
+import h2o
+import time
+from tests import pyunit_utils
+#----------------------------------------------------------------------
+# This test is used to verify if the orc parser warnings from backend is
+# passed down to python client when parsing orc files with unsupported
+# data types or bad data value.
+#----------------------------------------------------------------------
+
+def hdfs_orc_parser():
+
+    # Check if we are running inside the H2O network by seeing if we can touch
+    # the namenode.
+    hadoop_namenode_is_accessible = pyunit_utils.hadoop_namenode_is_accessible()
+
+    if hadoop_namenode_is_accessible:
+        hdfs_name_node = pyunit_utils.hadoop_namenode()
+
+        hdfs_orc_file = "/datasets/orc_parser/orc/TestOrcFile.testStringAndBinaryStatistics.orc"
+        url_orc = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_orc_file)
+        assert pyunit_utils.expect_warnings(url_orc, "UserWarning:", "Skipping field:", 1),\
+            "Expect warnings from orc parser for file "+url_orc+"!"
+
+        hdfs_orc_file = "/datasets/orc_parser/orc/TestOrcFile.emptyFile.orc"
+        url_orc = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_orc_file)
+        assert pyunit_utils.expect_warnings(url_orc, "UserWarning:", "Skipping field:", 1), \
+            "Expect warnings from orc parser for file "+url_orc+"!"
+
+        hdfs_orc_file = "/datasets/orc_parser/orc/nulls-at-end-snappy.orc"
+        url_orc = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_orc_file)
+        assert pyunit_utils.expect_warnings(url_orc, "UserWarning:", "Skipping field:", 1), \
+            "Expect warnings from orc parser for file "+url_orc+"!"
+
+    else:
+        raise EnvironmentError
+
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(hdfs_orc_parser)
+else:
+    hdfs_orc_parser()
\ No newline at end of file
diff --git a/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_hexdev_29_import_types_orc.py b/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_hexdev_29_import_types_orc.py
new file mode 100644
index 000000000000..ed3d8c301e7f
--- /dev/null
+++ b/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_hexdev_29_import_types_orc.py
@@ -0,0 +1,45 @@
+from __future__ import print_function
+import sys
+sys.path.insert(1,"../../")
+import h2o
+import time
+from tests import pyunit_utils
+#----------------------------------------------------------------------
+# Verifying that Python can define features as categorical or continuous
+# on import in HDFS.
+#----------------------------------------------------------------------
+
+
+def hdfs_orc_parser():
+
+    # Check if we are running inside the H2O network by seeing if we can touch
+    # the namenode.
+    hadoop_namenode_is_accessible = pyunit_utils.hadoop_namenode_is_accessible()
+
+    if hadoop_namenode_is_accessible:
+        hdfs_name_node = pyunit_utils.hadoop_namenode()
+        hdfs_orc_file = "/datasets/orc_parser/orc/hexdev_29.orc"
+        url_orc = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_orc_file)
+        hdfs_csv_file = "/datasets/orc_parser/csv/hexdev_29.csv"
+        url_csv = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_csv_file)
+
+        numElements2Compare = 0
+        tol_time = 200
+        tol_numeric = 1e-5
+
+        ctypes = ["enum"]*3
+        h2oframe_csv = h2o.import_file(url_csv, col_types=ctypes)
+        h2oframe_orc = h2o.import_file(url_orc, col_types=ctypes)
+
+        # compare the two frames
+        assert pyunit_utils.compare_frames(h2oframe_orc, h2oframe_csv, numElements2Compare, tol_time, tol_numeric,
+                                           True), "H2O frame parsed from orc and csv files are different!"
+
+    else:
+        raise EnvironmentError
+
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(hdfs_orc_parser)
+else:
+    hdfs_orc_parser()
\ No newline at end of file
diff --git a/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_import_folder_airline_05_orc_large.py b/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_import_folder_airline_05_orc_large.py
new file mode 100644
index 000000000000..a1b87c1fc4aa
--- /dev/null
+++ b/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_import_folder_airline_05_orc_large.py
@@ -0,0 +1,73 @@
+from __future__ import print_function
+import sys
+sys.path.insert(1,"../../")
+import h2o
+import time
+from tests import pyunit_utils
+#----------------------------------------------------------------------
+# This test will build a H2O frame from importing the bigdata/laptop/parser/orc/airlines_05p_orc_csv
+# from and build another H2O frame from the multi-file orc parser using multiple orc files that are
+# saved in the directory bigdata/laptop/parser/orc/airlines_05p_orc.  It will compare the two frames
+# to make sure they are equal.
+#----------------------------------------------------------------------
+
+
+def hdfs_orc_parser():
+
+    # Check if we are running inside the H2O network by seeing if we can touch
+    # the namenode.
+    hadoop_namenode_is_accessible = pyunit_utils.hadoop_namenode_is_accessible()
+
+    if hadoop_namenode_is_accessible:
+        hdfs_name_node = pyunit_utils.hadoop_namenode()
+
+        hdfs_orc_file = "/datasets/orc_parser/air05_orc"
+        url_orc = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_orc_file)
+        hdfs_csv_file = "/datasets/orc_parser/air05_csv"
+        url_csv = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_csv_file)
+
+        startcsv = time.time()
+        multi_file_csv = h2o.import_file(hdfs_csv_file, na_strings=['\\N'])
+        endcsv = time.time()
+
+        csv_type_dict = multi_file_csv.types
+
+        multi_file_csv.summary()
+        csv_summary = h2o.frame(multi_file_csv.frame_id)["frames"][0]["columns"]
+
+        col_ind_name = dict()
+        # change column types from real to enum according to multi_file_csv column types
+        for key_name in list(csv_type_dict):
+            col_ind = key_name.split('C')
+            new_ind = int(str(col_ind[1]))-1
+            col_ind_name[new_ind] = key_name
+
+        col_types = []
+        for ind in range(len(col_ind_name)):
+            col_types.append(csv_type_dict[col_ind_name[ind]])
+
+        startorc1 = time.time()
+        multi_file_orc1 = h2o.import_file(url_orc)
+        endorc1 = time.time()
+        h2o.remove(multi_file_orc1)
+
+        startorc = time.time()
+        multi_file_orc = h2o.import_file(url_orc,col_types=col_types)
+        endorc = time.time()
+
+        multi_file_orc.summary()
+        orc_summary = h2o.frame(multi_file_orc.frame_id)["frames"][0]["columns"]
+
+        print("************** CSV parse time is {0}".format(endcsv-startcsv))
+        print("************** ORC (without column type forcing) parse time is {0}".format(endorc1-startorc1))
+        print("************** ORC (with column type forcing) parse time is {0}".format(endorc-startorc))
+        # compare frame read by orc by forcing column type,
+        pyunit_utils.compare_frame_summary(csv_summary, orc_summary)
+    else:
+        raise EnvironmentError
+
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(hdfs_orc_parser)
+else:
+    hdfs_orc_parser()
\ No newline at end of file
diff --git a/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_import_folder_orc.py b/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_import_folder_orc.py
new file mode 100644
index 000000000000..c679aa2ccb82
--- /dev/null
+++ b/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_import_folder_orc.py
@@ -0,0 +1,46 @@
+from __future__ import print_function
+import sys
+sys.path.insert(1,"../../")
+import h2o
+import time
+from tests import pyunit_utils
+#----------------------------------------------------------------------
+# test that h2o.import_file works on a directory of files!
+#----------------------------------------------------------------------
+
+
+def hdfs_orc_parser():
+
+    # Check if we are running inside the H2O network by seeing if we can touch
+    # the namenode.
+    hadoop_namenode_is_accessible = pyunit_utils.hadoop_namenode_is_accessible()
+
+    if hadoop_namenode_is_accessible:
+        hdfs_name_node = pyunit_utils.hadoop_namenode()
+
+        tol_time = 200              # comparing in ms or ns
+        tol_numeric = 1e-5          # tolerance for comparing other numeric fields
+        numElements2Compare = 0   # choose number of elements per column to compare.  Save test time.
+
+        hdfs_csv_file = "/datasets/orc_parser/synthetic_perfect_separation_csv"
+        hdfs_orc_file = "/datasets/orc_parser/synthetic_perfect_separation_orc"
+
+        url_orc = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_orc_file)
+        url_csv = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_csv_file)
+
+
+        multi_file_csv = h2o.import_file(url_csv)
+        multi_file_orc = h2o.import_file(url_orc)
+
+        # make sure orc multi-file and single big file create same H2O frame
+        assert pyunit_utils.compare_frames(multi_file_orc , multi_file_csv, numElements2Compare, tol_time,
+                                           tol_numeric,True), "H2O frame parsed from multiple orc and single orc " \
+                                                              "files are different!"
+    else:
+        raise EnvironmentError
+
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(hdfs_orc_parser)
+else:
+    hdfs_orc_parser()
\ No newline at end of file
diff --git a/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_iris_import_types_orc.py b/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_iris_import_types_orc.py
new file mode 100644
index 000000000000..1196ebb34881
--- /dev/null
+++ b/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_iris_import_types_orc.py
@@ -0,0 +1,44 @@
+from __future__ import print_function
+import sys
+sys.path.insert(1,"../../")
+import h2o
+import time
+from tests import pyunit_utils
+#----------------------------------------------------------------------
+## Verifying that a user can change a column type to Enum if they like.
+#----------------------------------------------------------------------
+
+
+def hdfs_orc_parser():
+
+    # Check if we are running inside the H2O network by seeing if we can touch
+    # the namenode.
+    hadoop_namenode_is_accessible = pyunit_utils.hadoop_namenode_is_accessible()
+
+    if hadoop_namenode_is_accessible:
+        hdfs_name_node = pyunit_utils.hadoop_namenode()
+
+        numElements2Compare = 100
+        tol_time = 200
+        tol_numeric = 1e-5
+
+        hdfs_orc_file = "/datasets/orc_parser/orc/iris.orc"
+        url_orc = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_orc_file)
+        hdfs_csv_file = "/datasets/orc_parser/csv/iris.csv"
+        url_csv = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_csv_file)
+
+        h2oframe_csv = h2o.import_file(url_csv)
+        data_types = ['real', 'real', 'real', 'real', 'enum']
+        h2oframe_orc = h2o.import_file(url_orc, col_types = data_types)
+
+        # compare the two frames
+        assert pyunit_utils.compare_frames(h2oframe_orc, h2oframe_csv, numElements2Compare, tol_time, tol_numeric,
+                                           True), "H2O frame parsed from orc and csv files are different!"
+    else:
+        raise EnvironmentError
+
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(hdfs_orc_parser)
+else:
+    hdfs_orc_parser()
\ No newline at end of file
diff --git a/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_milsongs_orc_large.py b/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_milsongs_orc_large.py
new file mode 100644
index 000000000000..012bf88a43fe
--- /dev/null
+++ b/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_milsongs_orc_large.py
@@ -0,0 +1,45 @@
+from __future__ import print_function
+import sys
+sys.path.insert(1,"../../")
+import h2o
+import time
+from tests import pyunit_utils
+#----------------------------------------------------------------------
+# This test will build a H2O frame from importing the bigdata/laptop/parser/orc/milsongs_orc_csv
+# from and build another H2O frame from the multi-file orc parser using multiple orc files that are
+# saved in the directory bigdata/laptop/parser/orc/milsongs_orc.  It will compare the two frames
+# to make sure they are equal.
+#----------------------------------------------------------------------
+
+
+def hdfs_orc_parser():
+
+    # Check if we are running inside the H2O network by seeing if we can touch
+    # the namenode.
+    hadoop_namenode_is_accessible = pyunit_utils.hadoop_namenode_is_accessible()
+
+    if hadoop_namenode_is_accessible:
+        hdfs_name_node = pyunit_utils.hadoop_namenode()
+        hdfs_orc_file = "/datasets/orc_parser/milsongs_orc"
+        url_orc = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_orc_file)
+        hdfs_csv_file = "/datasets/orc_parser/milsongs_csv"
+        url_csv = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_csv_file)
+
+        multi_file_csv = h2o.import_file(url_csv)
+        multi_file_orc = h2o.import_file(url_orc)
+
+        multi_file_csv.summary()
+        csv_summary = h2o.frame(multi_file_csv.frame_id)["frames"][0]["columns"]
+
+        multi_file_orc.summary()
+        orc_summary = h2o.frame(multi_file_orc.frame_id)["frames"][0]["columns"]
+
+        pyunit_utils.compare_frame_summary(csv_summary, orc_summary)
+    else:
+        raise EnvironmentError
+
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(hdfs_orc_parser)
+else:
+    hdfs_orc_parser()
\ No newline at end of file
diff --git a/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_orc_parser.py b/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_orc_parser.py
new file mode 100644
index 000000000000..41637dbaeb77
--- /dev/null
+++ b/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_orc_parser.py
@@ -0,0 +1,60 @@
+from __future__ import print_function
+import sys
+sys.path.insert(1,"../../")
+import h2o
+from tests import pyunit_utils
+#----------------------------------------------------------------------
+# Purpose:  This test will test orc-parser in HDFS parsing multiple
+#           orc files collected by Tom K.
+#----------------------------------------------------------------------
+
+
+def hdfs_orc_parser():
+
+    # Check if we are running inside the H2O network by seeing if we can touch
+    # the namenode.
+    hadoop_namenode_is_accessible = pyunit_utils.hadoop_namenode_is_accessible()
+
+    if hadoop_namenode_is_accessible:
+        numElements2Compare = 10
+        tol_time = 200
+        tol_numeric = 1e-5
+
+        hdfs_name_node = pyunit_utils.hadoop_namenode()
+
+        allOrcFiles = ["/datasets/orc_parser/orc/TestOrcFile.columnProjection.orc",
+                       "/datasets/orc_parser/orc/bigint_single_col.orc",
+                       "/datasets/orc_parser/orc/TestOrcFile.emptyFile.orc",
+                       "/datasets/orc_parser/orc/bool_single_col.orc",
+                       "/datasets/orc_parser/orc/demo-11-zlib.orc",
+                       "/datasets/orc_parser/orc/TestOrcFile.testDate1900.orc",
+                       "/datasets/orc_parser/orc/demo-12-zlib.orc",
+                       "/datasets/orc_parser/orc/TestOrcFile.testDate2038.orc",
+                       "/datasets/orc_parser/orc/double_single_col.orc",
+                       "/datasets/orc_parser/orc/TestOrcFile.testMemoryManagementV11.orc",
+                       "/datasets/orc_parser/orc/float_single_col.orc",
+                       "/datasets/orc_parser/orc/TestOrcFile.testMemoryManagementV12.orc",
+                       "/datasets/orc_parser/orc/int_single_col.orc",
+                       "/datasets/orc_parser/orc/TestOrcFile.testPredicatePushdown.orc",
+                       "/datasets/orc_parser/orc/nulls-at-end-snappy.orc",
+                       "/datasets/orc_parser/orc/TestOrcFile.testSnappy.orc",
+                       "/datasets/orc_parser/orc/orc_split_elim.orc",
+                       "/datasets/orc_parser/orc/TestOrcFile.testStringAndBinaryStatistics.orc",
+                       "/datasets/orc_parser/orc/TestOrcFile.testStripeLevelStats.orc",
+                       "/datasets/orc_parser/orc/smallint_single_col.orc",
+                       "/datasets/orc_parser/orc/string_single_col.orc",
+                       "/datasets/orc_parser/orc/tinyint_single_col.orc",
+                       "/datasets/orc_parser/orc/TestOrcFile.testWithoutIndex.orc"]
+
+
+        for fIndex in range(len(allOrcFiles)):
+            url_orc = "hdfs://{0}{1}".format(hdfs_name_node, allOrcFiles[fIndex])
+            tab_test = h2o.import_file(url_orc)
+    else:
+        raise EnvironmentError
+
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(hdfs_orc_parser)
+else:
+    hdfs_orc_parser()
\ No newline at end of file
diff --git a/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_prostate_orc.py b/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_prostate_orc.py
new file mode 100644
index 000000000000..76a1b1aac6dd
--- /dev/null
+++ b/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_prostate_orc.py
@@ -0,0 +1,48 @@
+from __future__ import print_function
+import sys
+sys.path.insert(1,"../../")
+import h2o
+import time
+from tests import pyunit_utils
+#----------------------------------------------------------------------
+# To verify that the orc parser is parsing correctly, we want to take a file we know (prostate_NA.csv), convert
+# it to an Orc file (prostate_NA.orc) and build two H2O frames out of them.   We compare them and verified that
+# they are the same.
+#
+# Nidhi did this manually in Hive and verified that the parsing is correct.  I am automating the test here.
+#
+#----------------------------------------------------------------------
+
+
+def hdfs_orc_parser():
+
+    # Check if we are running inside the H2O network by seeing if we can touch
+    # the namenode.
+    hadoop_namenode_is_accessible = pyunit_utils.hadoop_namenode_is_accessible()
+
+    if hadoop_namenode_is_accessible:
+        hdfs_name_node = pyunit_utils.hadoop_namenode()
+
+        tol_time = 200              # comparing in ms or ns
+        tol_numeric = 1e-5          # tolerance for comparing other numeric fields
+        numElements2Compare = 10   # choose number of elements per column to compare.  Save test time.
+
+        hdfs_orc_file = "/datasets/orc_parser/orc/prostate_NA.orc"
+        hdfs_csv_file = "/datasets/orc_parser/csv/prostate_NA.csv"
+        url_orc = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_orc_file)
+        url_csv = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_csv_file)
+
+        h2oOrc = h2o.import_file(url_orc)
+        h2oCsv = h2o.import_file(url_csv)
+
+        # compare the two frames
+        assert pyunit_utils.compare_frames(h2oOrc, h2oCsv, numElements2Compare, tol_time, tol_numeric), \
+            "H2O frame parsed from orc and csv files are different!"
+    else:
+        raise EnvironmentError
+
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(hdfs_orc_parser)
+else:
+    hdfs_orc_parser()
\ No newline at end of file
diff --git a/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_timestamp_date_orc.py b/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_timestamp_date_orc.py
new file mode 100644
index 000000000000..b56b3531a890
--- /dev/null
+++ b/h2o-py/tests/testdir_hdfs/pyunit_INTERNAL_HDFS_timestamp_date_orc.py
@@ -0,0 +1,57 @@
+from __future__ import print_function
+import sys
+sys.path.insert(1,"../../")
+import h2o
+import time
+from tests import pyunit_utils
+#----------------------------------------------------------------------
+# This test will parse orc files containing timestamp and date information into
+# H2O frame.  Next, it will take the .csv file generated from the orc file from
+# Hive and parse into H2O frame.  Finally, we compare the two frames and make sure
+# that they are equal.
+#
+# We want to make sure that we are parsing the date and timestamp
+# date correctly from an orc file.  Thanks to Nidhi who has imported an orc file
+# containing timestamp/date into spark and later into Hive and write it out as
+# csv.
+#
+#----------------------------------------------------------------------
+
+def hdfs_orc_parser():
+
+    # Check if we are running inside the H2O network by seeing if we can touch
+    # the namenode.
+    hadoop_namenode_is_accessible = pyunit_utils.hadoop_namenode_is_accessible()
+
+    if hadoop_namenode_is_accessible:
+        hdfs_name_node = pyunit_utils.hadoop_namenode()
+
+        tol_time = 200              # comparing in ms or ns
+        tol_numeric = 1e-5          # tolerance for comparing other numeric fields
+        numElements2Compare = 100   # choose number of elements per column to compare.  Save test time.
+
+        allOrcFiles = ["/datasets/orc_parser/orc/TestOrcFile.testDate1900.orc",
+                       "/datasets/orc_parser/orc/TestOrcFile.testDate2038.orc",
+                       "/datasets/orc_parser/orc/orc_split_elim.orc"]
+
+        allCsvFiles = ["/datasets/orc_parser/csv/TestOrcFile.testDate1900.csv",
+                       "/datasets/orc_parser/csv/TestOrcFile.testDate2038.csv",
+                       "/datasets/orc_parser/csv/orc_split_elim.csv"]
+
+        for fIndex in range(len(allOrcFiles)):
+            url_orc = "hdfs://{0}{1}".format(hdfs_name_node, allOrcFiles[fIndex])
+            url_csv = "hdfs://{0}{1}".format(hdfs_name_node, allCsvFiles[fIndex])
+            h2oOrc = h2o.import_file(url_orc)
+            h2oCsv = h2o.import_file(url_csv)
+
+            # compare the two frames
+            assert pyunit_utils.compare_frames(h2oOrc, h2oCsv, numElements2Compare, tol_time, tol_numeric), \
+                "H2O frame parsed from orc and csv files are different!"
+    else:
+        raise EnvironmentError
+
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(hdfs_orc_parser)
+else:
+    hdfs_orc_parser()
\ No newline at end of file
diff --git a/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser.py b/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser.py
new file mode 100644
index 000000000000..f43d9b15c462
--- /dev/null
+++ b/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser.py
@@ -0,0 +1,53 @@
+from builtins import str
+import sys
+sys.path.insert(1,"../../")
+import h2o
+from tests import pyunit_utils
+from random import randint
+
+"""
+This test takes all orc files collected by Tom K and try to parse them into H2O frames.
+Due to test duration limitation, we do not parse all the files.  Instead, we randomly
+choose about 30% of the files and parse them into H2O frames.  If all pareses are successful,
+the test pass and else it fails.
+"""
+def orc_parser_test():
+    allOrcFiles = ["smalldata/parser/orc/TestOrcFile.columnProjection.orc",
+      "smalldata/parser/orc/bigint_single_col.orc",
+      "smalldata/parser/orc/TestOrcFile.emptyFile.orc",
+      "smalldata/parser/orc/bool_single_col.orc",
+      "smalldata/parser/orc/demo-11-zlib.orc",
+      "smalldata/parser/orc/TestOrcFile.testDate1900.orc",
+      "smalldata/parser/orc/demo-12-zlib.orc",
+      "smalldata/parser/orc/TestOrcFile.testDate2038.orc",
+      "smalldata/parser/orc/double_single_col.orc",
+      "smalldata/parser/orc/TestOrcFile.testMemoryManagementV11.orc",
+      "smalldata/parser/orc/float_single_col.orc",
+      "smalldata/parser/orc/TestOrcFile.testMemoryManagementV12.orc",
+      "smalldata/parser/orc/int_single_col.orc",
+      "smalldata/parser/orc/TestOrcFile.testPredicatePushdown.orc",
+      "smalldata/parser/orc/nulls-at-end-snappy.orc",
+      "smalldata/parser/orc/TestOrcFile.testSnappy.orc",
+      "smalldata/parser/orc/orc_split_elim.orc",
+      "smalldata/parser/orc/TestOrcFile.testStringAndBinaryStatistics.orc",
+      "smalldata/parser/orc/TestOrcFile.testStripeLevelStats.orc",
+      "smalldata/parser/orc/smallint_single_col.orc",
+      "smalldata/parser/orc/string_single_col.orc",
+      "smalldata/parser/orc/tinyint_single_col.orc",
+      "smalldata/parser/orc/TestOrcFile.testWithoutIndex.orc"]
+
+    for fIndex in range(len(allOrcFiles)):
+        #Test tab seperated files by giving separator argument
+        tab_test = h2o.import_file(path=pyunit_utils.locate(allOrcFiles[fIndex]))
+
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(orc_parser_test)
+else:
+    orc_parser_test()
+
+
+
+
+
+
diff --git a/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_baddata.py b/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_baddata.py
new file mode 100644
index 000000000000..7791bd055a7b
--- /dev/null
+++ b/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_baddata.py
@@ -0,0 +1,27 @@
+import sys
+sys.path.insert(1,"../../")
+from tests import pyunit_utils
+
+def orc_parser_baddata():
+    """
+    This test is used to verify if the orc parser warnings from backend is passed down to python client
+    when parsing orc files with unsupported data types or bad data value.
+
+    :return: None or a fit if no warning is captured
+    """
+    fileWithPath = "smalldata/parser/orc/TestOrcFile.testStringAndBinaryStatistics.orc"
+    assert pyunit_utils.expect_warnings(fileWithPath, "UserWarning:", "Skipping field:", 1), \
+        "Expect warnings from orc parser for file "+fileWithPath+"!"
+
+    fileWithPath = "smalldata/parser/orc/TestOrcFile.emptyFile.orc"
+    assert pyunit_utils.expect_warnings(fileWithPath, "UserWarning:", "Skipping field:", 4), \
+        "Expect warnings from orc parser for file "+fileWithPath+"!"
+
+    fileWithPath = "smalldata/parser/orc/nulls-at-end-snappy.orc"
+    assert pyunit_utils.expect_warnings(fileWithPath, "UserWarning:", "Long.MIN_VALUE:", 1), \
+        "Expect warnings from orc parser for file "+fileWithPath+"!"
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(orc_parser_baddata)
+else:
+    orc_parser_baddata()
diff --git a/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_hexdev_29_import_types.py b/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_hexdev_29_import_types.py
new file mode 100644
index 000000000000..397779643022
--- /dev/null
+++ b/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_hexdev_29_import_types.py
@@ -0,0 +1,30 @@
+import sys
+sys.path.insert(1,"../../")
+import h2o
+from tests import pyunit_utils
+################################################################################
+##
+## Verifying that Python can define features as categorical or continuous on import
+##
+################################################################################
+
+
+def continuous_or_categorical():
+    numElements2Compare = 0
+    tol_time = 200
+    tol_numeric = 1e-5
+
+    ctypes = ["enum"]*3
+    h2oframe_csv = h2o.import_file(pyunit_utils.locate("smalldata/jira/hexdev_29.csv"), col_types=ctypes)
+    h2oframe_orc = h2o.import_file(pyunit_utils.locate("smalldata/parser/orc/hexdev_29.orc"), col_types=ctypes)
+
+    # compare the two frames
+    assert pyunit_utils.compare_frames(h2oframe_orc, h2oframe_csv, numElements2Compare, tol_time, tol_numeric, True), \
+        "H2O frame parsed from orc and csv files are different!"
+
+
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(continuous_or_categorical)
+else:
+    continuous_or_categorical()
diff --git a/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_import_folder.py b/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_import_folder.py
new file mode 100644
index 000000000000..912ed6a4f2a1
--- /dev/null
+++ b/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_import_folder.py
@@ -0,0 +1,30 @@
+from __future__ import print_function
+import sys
+sys.path.insert(1,"../../")
+import h2o
+from tests import pyunit_utils
+
+# test that h2o.import_file works on a directory of files!
+def import_folder():
+
+  tol_time = 200              # comparing in ms or ns
+  tol_numeric = 1e-5          # tolerance for comparing other numeric fields
+  numElements2Compare = 0   # choose number of elements per column to compare.  Save test time.
+
+  multi_file_csv1 = h2o.import_file(path=pyunit_utils.locate("smalldata/parser/orc/synthetic_perfect_seperation_csv/balunbal.csv"))
+  multi_file_csv2 = h2o.import_file(path=pyunit_utils.locate("smalldata/parser/orc/synthetic_perfect_seperation_csv/unbalbal.csv"))
+  multi_file_orc = h2o.import_file(path=pyunit_utils.locate("smalldata/parser/orc/synthetic_perfect_separation"))
+
+  # make sure orc multi-file and single big file create same H2O frame
+  try:
+    assert pyunit_utils.compare_frames(multi_file_orc , multi_file_csv1, numElements2Compare, tol_time, tol_numeric,
+                                       True), "H2O frame parsed from multiple orc and single orc files are different!"
+  except:
+    assert pyunit_utils.compare_frames(multi_file_orc , multi_file_csv2, numElements2Compare, tol_time, tol_numeric,
+                                       True), "H2O frame parsed from multiple orc and single orc files are different!"
+
+
+if __name__ == "__main__":
+  pyunit_utils.standalone_test(import_folder)
+else:
+  import_folder()
diff --git a/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_import_folder_airline_05p_large.py b/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_import_folder_airline_05p_large.py
new file mode 100644
index 000000000000..578c4f27db97
--- /dev/null
+++ b/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_import_folder_airline_05p_large.py
@@ -0,0 +1,61 @@
+from __future__ import print_function
+import sys
+sys.path.insert(1,"../../")
+import h2o
+import time
+from tests import pyunit_utils
+
+
+def import_folder():
+    """
+    This test will build a H2O frame from importing the bigdata/laptop/parser/orc/airlines_05p_orc_csv
+    from and build another H2O frame from the multi-file orc parser using multiple orc files that are
+    saved in the directory bigdata/laptop/parser/orc/airlines_05p_orc.  It will compare the two frames
+    to make sure they are equal.
+    :return: None if passed.  Otherwise, an exception will be thrown.
+    """
+    startcsv = time.time()
+    multi_file_csv = h2o.import_file(path=pyunit_utils.locate("bigdata/laptop/parser/orc/pubdev_3200/air05_csv"),
+                                     na_strings=['\\N'])
+    endcsv = time.time()
+
+    csv_type_dict = multi_file_csv.types
+
+    multi_file_csv.summary()
+    csv_summary = h2o.frame(multi_file_csv.frame_id)["frames"][0]["columns"]
+
+    col_ind_name = dict()
+    # change column types from real to enum according to multi_file_csv column types
+    for key_name in list(csv_type_dict):
+        col_ind = key_name.split('C')
+        new_ind = int(str(col_ind[1]))-1
+        col_ind_name[new_ind] = key_name
+
+    col_types = []
+    for ind in range(len(col_ind_name)):
+        col_types.append(csv_type_dict[col_ind_name[ind]])
+
+    startorc1 = time.time()
+    multi_file_orc1 = h2o.import_file(path=pyunit_utils.locate("bigdata/laptop/parser/orc/pubdev_3200/air05_orc"))
+    endorc1 = time.time()
+    h2o.remove(multi_file_orc1)
+
+    startorc = time.time()
+    multi_file_orc = h2o.import_file(path=pyunit_utils.locate("bigdata/laptop/parser/orc/pubdev_3200/air05_orc"),
+                                     col_types=col_types)
+    endorc = time.time()
+
+    multi_file_orc.summary()
+    orc_summary = h2o.frame(multi_file_orc.frame_id)["frames"][0]["columns"]
+
+    print("************** CSV parse time is {0}".format(endcsv-startcsv))
+    print("************** ORC (without column type forcing) parse time is {0}".format(endorc1-startorc1))
+    print("************** ORC (with column type forcing) parse time is {0}".format(endorc-startorc))
+    # compare frame read by orc by forcing column type,
+    pyunit_utils.compare_frame_summary(csv_summary, orc_summary)
+
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(import_folder)
+else:
+    import_folder()
diff --git a/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_import_folder_milsongs_large.py b/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_import_folder_milsongs_large.py
new file mode 100644
index 000000000000..adc9209e8ba2
--- /dev/null
+++ b/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_import_folder_milsongs_large.py
@@ -0,0 +1,30 @@
+from __future__ import print_function
+import sys
+sys.path.insert(1,"../../")
+import h2o
+from tests import pyunit_utils
+
+
+def import_folder():
+    """
+    This test will build a H2O frame from importing the bigdata/laptop/parser/orc/milsongs_orc_csv
+    from and build another H2O frame from the multi-file orc parser using multiple orc files that are
+    saved in the directory bigdata/laptop/parser/orc/milsongs_orc.  It will compare the two frames
+    to make sure they are equal.
+    :return: None if passed.  Otherwise, an exception will be thrown.
+    """
+    multi_file_csv = h2o.import_file(path=pyunit_utils.locate("bigdata/laptop/parser/orc/milsongs_orc_csv"))
+    multi_file_orc = h2o.import_file(path=pyunit_utils.locate("bigdata/laptop/parser/orc/milsongs_orc"))
+
+    multi_file_csv.summary()
+    csv_summary = h2o.frame(multi_file_csv.frame_id)["frames"][0]["columns"]
+
+    multi_file_orc.summary()
+    orc_summary = h2o.frame(multi_file_orc.frame_id)["frames"][0]["columns"]
+
+    pyunit_utils.compare_frame_summary(csv_summary, orc_summary)
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(import_folder)
+else:
+    import_folder()
diff --git a/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_iris_import_types.py b/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_iris_import_types.py
new file mode 100644
index 000000000000..62b25ff2cd59
--- /dev/null
+++ b/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_iris_import_types.py
@@ -0,0 +1,30 @@
+import sys
+sys.path.insert(1,"../../")
+import h2o
+from tests import pyunit_utils
+################################################################################
+##
+## Verifying that a user can change a column type to Enum if they like.
+##
+################################################################################
+
+
+def continuous_or_categorical_orc():
+    numElements2Compare = 100
+    tol_time = 200
+    tol_numeric = 1e-5
+
+    h2oframe_csv = h2o.import_file(pyunit_utils.locate("smalldata/iris/iris.csv"))
+    data_types = ['real', 'real', 'real', 'real', 'enum']
+    h2oframe_orc = h2o.import_file(pyunit_utils.locate("smalldata/parser/orc/iris.orc"), col_types = data_types)
+
+    # compare the two frames
+    assert pyunit_utils.compare_frames(h2oframe_orc, h2oframe_csv, numElements2Compare, tol_time, tol_numeric, True), \
+        "H2O frame parsed from orc and csv files are different!"
+
+
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(continuous_or_categorical_orc)
+else:
+    continuous_or_categorical_orc()
diff --git a/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_prostate.py b/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_prostate.py
new file mode 100644
index 000000000000..c1322033539a
--- /dev/null
+++ b/h2o-py/tests/testdir_parser/pyunit_NOFEATURE_orc_parser_prostate.py
@@ -0,0 +1,33 @@
+from builtins import str
+import sys
+sys.path.insert(1,"../../")
+import h2o
+from tests import pyunit_utils
+
+def orc_parser_timestamp_date():
+    """
+    To verify that the orc parser is parsing correctly, we want to take a file we know (prostate_NA.csv), convert
+    it to an Orc file (prostate_NA.orc) and build two H2O frames out of them.   We compare them and verified that
+    they are the same.
+
+    Nidhi did this manually in Hive and verified that the parsing is correct.  I am automating the test here.
+
+    :return: None
+    """
+
+    tol_time = 200              # comparing in ms or ns
+    tol_numeric = 1e-5          # tolerance for comparing other numeric fields
+    numElements2Compare = 10   # choose number of elements per column to compare.  Save test time.
+
+    h2oOrc = h2o.import_file(path=pyunit_utils.locate('smalldata/parser/orc/prostate_NA.orc'))
+    h2oCsv = h2o.import_file(path=pyunit_utils.locate('smalldata/parser/csv2orc/prostate_NA.csv'))
+
+    # compare the two frames
+    assert pyunit_utils.compare_frames(h2oOrc, h2oCsv, numElements2Compare, tol_time, tol_numeric), \
+        "H2O frame parsed from orc and csv files are different!"
+
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(orc_parser_timestamp_date)
+else:
+    orc_parser_timestamp_date()
diff --git a/h2o-py/tests/testdir_parser/pyunit_orc_NOFEATURE_parser_timestamp_date.py b/h2o-py/tests/testdir_parser/pyunit_orc_NOFEATURE_parser_timestamp_date.py
new file mode 100644
index 000000000000..49ddc3e6a3d5
--- /dev/null
+++ b/h2o-py/tests/testdir_parser/pyunit_orc_NOFEATURE_parser_timestamp_date.py
@@ -0,0 +1,49 @@
+from builtins import str
+import sys
+sys.path.insert(1,"../../")
+import h2o
+from tests import pyunit_utils
+
+
+def orc_parser_timestamp_date():
+    """
+    This test will parse orc files containing timestamp and date information into
+    H2O frame.  Next, it will take the .csv file generated from the orc file from
+    Hive and parse into H2O frame.  Finally, we compare the two frames and make sure
+    that they are equal.
+
+    We want to make sure that we are parsing the date and timestamp
+    date correctly from an orc file.  Thanks to Nidhi who has imported an orc file
+    containing timestamp/date into spark and later into Hive and write it out as
+    csv.
+
+    :return: None
+    """
+
+    tol_time = 200              # comparing in ms or ns
+    tol_numeric = 1e-5          # tolerance for comparing other numeric fields
+    numElements2Compare = 100   # choose number of elements per column to compare.  Save test time.
+
+    allOrcFiles = ["smalldata/parser/orc/TestOrcFile.testDate1900.orc",
+                   "smalldata/parser/orc/TestOrcFile.testDate2038.orc",
+                   "smalldata/parser/orc/orc_split_elim.orc"]
+
+    allCsvFiles = ["smalldata/parser/orc/orc2csv/TestOrcFile.testDate1900.csv",
+                   "smalldata/parser/orc/orc2csv/TestOrcFile.testDate2038.csv",
+                   "smalldata/parser/orc/orc2csv/orc_split_elim.csv"]
+
+    for fIndex in range(len(allOrcFiles)):
+
+        h2oOrc = h2o.import_file(path=pyunit_utils.locate(allOrcFiles[fIndex]))
+        h2oCsv = h2o.import_file(path=pyunit_utils.locate(allCsvFiles[fIndex]))
+
+        # compare the two frames
+        assert pyunit_utils.compare_frames(h2oOrc, h2oCsv, numElements2Compare, tol_time, tol_numeric), \
+            "H2O frame parsed from orc and csv files are different!"
+
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(orc_parser_timestamp_date)
+else:
+    orc_parser_timestamp_date()
+
diff --git a/h2o-r/tests/testdir_hdfs/runit_INTERNAL_HDFS_airlines_orc.R b/h2o-r/tests/testdir_hdfs/runit_INTERNAL_HDFS_airlines_orc.R
new file mode 100644
index 000000000000..d32cb1198079
--- /dev/null
+++ b/h2o-r/tests/testdir_hdfs/runit_INTERNAL_HDFS_airlines_orc.R
@@ -0,0 +1,67 @@
+setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
+source("../../scripts/h2o-r-test-setup.R")
+#----------------------------------------------------------------------
+# Purpose:  This tests orc parser on multi-file parsing in HDFS.
+#----------------------------------------------------------------------
+
+# Check if we are running inside the H2O network by seeing if we can touch
+# the namenode.
+hdfs_name_node <- Sys.getenv(c("NAME_NODE"))
+print(hdfs_name_node)
+
+#myIP   <- H2O.IP
+#myPort <- H2O.PORT
+
+hdfs_air_orc = "/datasets/airlines_all_orc_parts"
+hdfs_air_original = "/datasets/airlines/airlines_all.csv"
+
+#h2o.init(ip=myIP, port=myPort, startH2O = FALSE)
+
+#----------------------------------------------------------------------
+
+heading("BEGIN TEST")
+check.hdfs_airorc <- function() {
+
+  heading("Import airlines 116M dataset in original csv format ")
+  url <- sprintf("hdfs://%s%s", hdfs_name_node, hdfs_air_original)
+
+  print("************** csv parsing time: ")
+  ptm <- proc.time()
+  csv.hex <- h2o.importFile(url,destination_frame = "csv.hex")
+  timepassed = proc.time() - ptm
+  print(timepassed)
+
+  n <- nrow(csv.hex)
+  print(paste("Imported n =", n, "rows from csv"))
+
+  heading("Import airlines 116M dataset in ORC format ")
+
+  #print("************** orc parsing time without forcing column types: ")
+  #ptm <- proc.time()
+  #orc2.hex <- h2o.importFolder(url,destination_frame = "dd2")
+  #timepassed = proc.time() - ptm
+  #print(timepassed)
+  #h2o.rm(orc2.hex)
+
+  url <- sprintf("hdfs://%s%s", hdfs_name_node, hdfs_air_orc)
+  print("************** orc parsing time: ")
+  ptm <- proc.time()
+  orc.hex <- h2o.importFile(url,destination_frame = "orc.hex",col.names = names(csv.hex),
+                      col.types = c("Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Enum","Numeric",
+                      "Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Enum","Enum","Numeric","Numeric","Numeric","Numeric"
+                      ,"Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Enum","Enum"))
+  timepassed = proc.time() - ptm
+  print(timepassed)
+
+  n <- nrow(orc.hex)
+  print(paste("Imported n =", n, "rows from orc"))
+
+
+  expect_equal(dim(orc.hex),dim(csv.hex))
+  expect_equal(summary(orc.hex),summary(csv.hex))
+
+  h2o.rm(orc.hex)   # remove file
+  h2o.rm(csv.hex)
+}
+
+doTest("ORC multifile parse test", check.hdfs_airorc)
\ No newline at end of file
diff --git a/h2o-r/tests/testdir_parser/runit_NOFEATURE_orc_parser.R b/h2o-r/tests/testdir_parser/runit_NOFEATURE_orc_parser.R
new file mode 100644
index 000000000000..f09e0d752f80
--- /dev/null
+++ b/h2o-r/tests/testdir_parser/runit_NOFEATURE_orc_parser.R
@@ -0,0 +1,40 @@
+setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
+source("../../scripts/h2o-r-test-setup.R")
+
+# This simple test is used to make sure that orc file parsing works across the REST
+# API for R clients.
+
+test.orc_parser <- function(){
+  Options(warn=1)
+  # all orc files that Tom K has found
+  allOrcFiles = c("smalldata/parser/orc/TestOrcFile.columnProjection.orc",
+      "smalldata/parser/orc/bigint_single_col.orc",
+      "smalldata/parser/orc/TestOrcFile.emptyFile.orc",
+      "smalldata/parser/orc/bool_single_col.orc",
+      "smalldata/parser/orc/demo-11-zlib.orc",
+      "smalldata/parser/orc/TestOrcFile.testDate1900.orc",
+      "smalldata/parser/orc/demo-12-zlib.orc",
+      "smalldata/parser/orc/TestOrcFile.testDate2038.orc",
+      "smalldata/parser/orc/double_single_col.orc",
+      "smalldata/parser/orc/TestOrcFile.testMemoryManagementV11.orc",
+      "smalldata/parser/orc/float_single_col.orc",
+      "smalldata/parser/orc/TestOrcFile.testMemoryManagementV12.orc",
+      "smalldata/parser/orc/int_single_col.orc",
+      "smalldata/parser/orc/TestOrcFile.testPredicatePushdown.orc",
+      "smalldata/parser/orc/nulls-at-end-snappy.orc",
+      "smalldata/parser/orc/TestOrcFile.testSnappy.orc",
+      "smalldata/parser/orc/orc_split_elim.orc",
+      "smalldata/parser/orc/TestOrcFile.testStringAndBinaryStatistics.orc",
+      "smalldata/parser/orc/TestOrcFile.testStripeLevelStats.orc",
+      "smalldata/parser/orc/smallint_single_col.orc",
+      "smalldata/parser/orc/string_single_col.orc",
+      "smalldata/parser/orc/tinyint_single_col.orc",
+      "smalldata/parser/orc/TestOrcFile.testWithoutIndex.orc")
+
+  for (temp in 1:length(allOrcFiles)) {
+    h2oFrame = h2o.importFile(locate(allOrcFiles[temp]))
+  }
+
+}
+
+doTest("Orc parser Test", test.orc_parser )
diff --git a/h2o-r/tests/testdir_parser/runit_NOFEATURE_orc_parser_airlines_05p_large.R b/h2o-r/tests/testdir_parser/runit_NOFEATURE_orc_parser_airlines_05p_large.R
new file mode 100644
index 000000000000..be6a9b0ebec4
--- /dev/null
+++ b/h2o-r/tests/testdir_parser/runit_NOFEATURE_orc_parser_airlines_05p_large.R
@@ -0,0 +1,46 @@
+setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
+source("../../scripts/h2o-r-test-setup.R")
+################################################################################
+##
+## This tests Orc multifile parser by comparing the summary of the original csv frame with the h2o parsed orc frame
+##
+################################################################################
+
+
+test.continuous.or.categorical <- function() {
+
+
+	original = h2o.importFile(locate("bigdata/laptop/airlines_all.05p.csv"),destination_frame = "original",
+                     col.types=c("Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Enum","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Enum","Enum","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Enum","Enum"))
+	print("************** csv parsing time: ")
+	ptm <- proc.time()
+	csv = h2o.importFile(locate("bigdata/laptop/parser/orc/pubdev_3200/air05_csv"),destination_frame = "csv",col.names = names(original),
+                     col.types=c("Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Enum","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Enum","Enum","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Enum","Enum"))
+  timepassed = proc.time() - ptm
+  print(timepassed)
+  
+  print("************** orc parsing time without forcing column types: ")
+  ptm <- proc.time()
+  orc2 = h2o.importFile(locate("bigdata/laptop/parser/orc/pubdev_3200/air05_orc"),destination_frame = "orc2",col.names = names(original))
+  timepassed = proc.time()-ptm
+  print(timepassed)
+  h2o.rm(orc2)
+  
+  print("************** orc parsing time forcing same column types as csv: ")
+  ptm <- proc.time()
+		orc = h2o.importFile(locate("bigdata/laptop/parser/orc/pubdev_3200/air05_orc"),destination_frame = "orc",col.names = names(original),
+                     col.types=c("Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Enum","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Enum","Enum","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Enum","Enum"))
+  timepassed = proc.time()-ptm
+
+  
+  print(timepassed)
+  
+  	expect_equal(summary(csv),summary(original))
+  	
+  	for(i in 1:ncol(csv)){
+       print(i)
+       expect_equal(summary(csv[,i]),summary(orc[,i]))
+    }
+}
+
+doTest("Test orc multifile parser", test.continuous.or.categorical)
diff --git a/h2o-r/tests/testdir_parser/runit_NOFEATURE_orc_parser_baddata.R b/h2o-r/tests/testdir_parser/runit_NOFEATURE_orc_parser_baddata.R
new file mode 100644
index 000000000000..5c7fcd5c91fd
--- /dev/null
+++ b/h2o-r/tests/testdir_parser/runit_NOFEATURE_orc_parser_baddata.R
@@ -0,0 +1,28 @@
+setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
+source("../../scripts/h2o-r-test-setup.R")
+
+# This test is written to make sure that warnings from Orc Parser are passed to the R client.
+# In particular, the first two Orc files contain unsupported column types.
+# The third Orc file contains big integer values that are used by sentinel for H2O frame.
+
+test.orc_parser.bad_data <- function() {
+  options(warn=1)     # make warnings to cause an error 
+  
+  # These files contain unsupported data types
+  frame = h2o.importFile(locate("smalldata/parser/orc/TestOrcFile.testStringAndBinaryStatistics.orc"))
+  expect_warning(h2o.importFile(locate("smalldata/parser/orc/TestOrcFile.testStringAndBinaryStatistics.orc")))
+  frame = h2o.importFile(locate("smalldata/parser/orc/TestOrcFile.emptyFile.orc"))
+  expect_warning(h2o.importFile(locate("smalldata/parser/orc/TestOrcFile.emptyFile.orc")))
+  # This file contains big integer value Long.MIN_VALUE that is used for sentinel
+  frame = h2o.importFile(locate("smalldata/parser/orc/nulls-at-end-snappy.orc"))
+  expect_warning(h2o.importFile(locate("smalldata/parser/orc/nulls-at-end-snappy.orc")))
+  
+#   b = warnings()    # collect all warnings into a list
+#   print(length(b))
+#   if (length(b) < 1) {
+#      browser()
+# #     throw("Not all warning messages are passed from Java to R client.")
+#    }
+}
+
+doTest("Orc Parser: make sure warnings are passed to user.", test.orc_parser.bad_data)
diff --git a/h2o-r/tests/testdir_parser/runit_orc_NOFEATURE_parser_milsongs_large.R b/h2o-r/tests/testdir_parser/runit_orc_NOFEATURE_parser_milsongs_large.R
new file mode 100644
index 000000000000..11e79c2313fb
--- /dev/null
+++ b/h2o-r/tests/testdir_parser/runit_orc_NOFEATURE_parser_milsongs_large.R
@@ -0,0 +1,33 @@
+setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
+source("../../scripts/h2o-r-test-setup.R")
+################################################################################
+##
+## This tests Orc multifile parser by comparing the summary of the original csv frame with the h2o parsed orc frame
+##  on milsongs dataset
+################################################################################
+
+
+test.continuous <- function() {
+
+
+	original = h2o.importFile(locate("bigdata/laptop/milsongs/milsongs-train.csv.gz"),destination_frame = "original")
+	
+	csv = h2o.importFile(locate("bigdata/laptop/parser/orc/milsongs_orc_csv"),destination_frame = "csv",col.names = names(original),
+						 col.types = c("Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric",
+                                   "Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric",
+                                   "Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric"))
+
+	orc = h2o.importFolder(locate("bigdata/laptop/parser/orc/milsongs_orc"),pattern = "*_0",destination_frame = "orc",col.names = names(original),
+                           col.types = c("Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric",
+                                   "Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric",
+                                   "Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric","Numeric"))
+
+  	expect_equal(dim(csv),dim(orc))
+  	
+  	expect_equal(summary(csv),summary(original))
+  	
+    expect_equal(summary(csv),summary(orc))
+    
+}
+
+doTest("Test orc multifile parser", test.continuous)
diff --git a/scripts/run.py b/scripts/run.py
index 785353245479..3610ea8b1e0b 100755
--- a/scripts/run.py
+++ b/scripts/run.py
@@ -2051,7 +2051,7 @@ def usage():
     print("    --excludelist    A file containing a list of tests to NOT run.")
     print("")
     print("    --testgroup      Test a group of tests by function:")
-    print("                     pca, glm, kmeans, gbm, rf, deeplearning, algos, golden, munging")
+    print("                     pca, glm, kmeans, gbm, rf, deeplearning, algos, golden, munging, parser")
     print("")
     print("    --testsize       Sizes (and by extension length) of tests to run:")
     print("                     s=small (seconds), m=medium (a minute or two), l=large (longer), x=xlarge (very big)")
diff --git a/scripts/saveTableAsOrc.textile b/scripts/saveTableAsOrc.textile
index 36b0403daa18..214420929b2f 100644
--- a/scripts/saveTableAsOrc.textile
+++ b/scripts/saveTableAsOrc.textile
@@ -1,8 +1,8 @@
 set hive.execution.engine=mr;
 set mapreduce.map.memory.mb=5240;
 set mapreduce.reduce.memory.mb=5240;
-set mapreduce.map.java.opts=-DAMYWANG_MAP=1 -Xmx4G -XX:PermSize=256m -XX:MaxPermSize=256m -XX:+PrintGCDetails -XX:+PrintGCTimeStamps;
-set mapreduce.reduce.java.opts=-DAMYWANG_REDUCE=1 -Xmx4G -XX:PermSize=256m -XX:MaxPermSize=256m -XX:+PrintGCDetails -XX:+PrintGCTimeStamps;
+set mapreduce.map.java.opts=-DAMYWANG_MAP=1 -Xmx4G -XX:PermSize=256m -XX:MaxPermSize=256m -XX:PrintGCDetails -XX:PrintGCTimeStamps;
+set mapreduce.reduce.java.opts=-DAMYWANG_REDUCE=1 -Xmx4G -XX:PermSize=256m -XX:MaxPermSize=256m -XX:PrintGCDetails -XX:PrintGCTimeStamps;
 create table airlines_all_05p(
 Year INT,
 Month INT,
@@ -37,10 +37,11 @@ IsArrDelayed STRING,
 IsDepDelayed STRING
 )
 ROW FORMAT DELIMITED
-FIELDS TERMINATED BY ','
-location '/apps/hive/warehouse/data/airlines_all_05p';
-load data inpath 'hdfs://mr-0xd6.0xdata.loc:8020/user/amy/airlines_all.05p.csv' into table airlines_all_05p;
-create table orc_airlines_all_05p(
+FIELDS TERMINATED BY ‘,’
+location ‘/apps/hive/warehouse/data/airlines_all_05p’;
+load data inpath ‘hdfs://mr-0xd6.0xdata.loc:8020/user/amy/airlines_all.05p.csv’ into table airlines_all_05p;
+
+create table csv_airlines_all(
 Year INT,
 Month INT,
 DayofMonth INT,
@@ -51,7 +52,7 @@ ArrTime INT,
 CRSArrTime INT,
 UniqueCarrier STRING,
 FlightNum INT,
-TailNum INT,
+TailNum STRING,
 ActualElapsedTime INT,
 CRSElapsedTime INT,
 AirTime INT,
@@ -74,7 +75,151 @@ IsArrDelayed STRING,
 IsDepDelayed STRING
 )
 ROW FORMAT DELIMITED
-FIELDS TERMINATED BY ','
+FIELDS TERMINATED BY ‘,’
 STORED AS ORC;
 INSERT OVERWRITE TABLE orc_airlines_all_05p select * from airlines_all_05p;
 
+The following is from Nidhi:
+CREATE EXTERNAL TABLE ta (a1 INT, a2 STRING, a3 STRING, a4 STRING, a5 INT, a6 STRING, a7 INT, a8 INT, a9 INT)
+STORED AS ORC;
+
+LOAD DATA local INPATH ‘/home/wendy/demo-11-zlib.orc’ OVERWRITE INTO TABLE ta;
+​
+select * from ta limit 3;
+#OK
+#1	M	M	Primary	500	Good	0	0	0
+#2	F	M	Primary	500	Good	0	0	0
+#3	M	S	Primary	500	Good	0	0	0
+​
+CREATE EXTERNAL TABLE aa (a1 INT, a2 STRING, a3 STRING, a4 STRING, a5 INT, a6 STRING, a7 INT, a8 INT, a9 INT)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,’
+STORED AS TEXTFILE
+LOCATION ‘/user/wendy/from_hive’;
+​
+select * from aa limit 3;
+#OK
+#NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
+#NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
+#NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
+​
+INSERT OVERWRITE TABLE aa SELECT * FROM ta;
+​
+select * from aa limit 3;
+#OK
+#1	M	M	Primary	500	Good	0	0	0
+#2	F	M	Primary	500	Good	0	0	0
+#3	M	S	Primary	500	Good	0	0	0
+#Time taken: 0.079 seconds, Fetched: 3 row(s)
+
+Convert csv to orc:
+CREATE EXTERNAL TABLE da (a1 INT, a2 INT, a3 INT, a4 INT, a5 INT, a6 INT, a7 DOUBLE, a8 DOUBLE, a9 INT)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,’
+STORED AS TEXTFILE;
+LOAD DATA local INPATH ‘/home/nidhi/prostate.csv’ OVERWRITE INTO TABLE bb;
+​
+select * from da limit 3;
+​
+​
+CREATE EXTERNAL TABLE bb (a1 INT, a2 INT, a3 INT, a4 INT, a5 INT, a6 INT, a7 DOUBLE, a8 DOUBLE, a9 INT)
+STORED AS ORC
+LOCATION ‘/user/nidhi/from_hive_prna’;
+
+INSERT OVERWRITE TABLE bb
+SELECT *
+FROM da;
+​
+select * from bb limit 3;
+
+create table for milsongs
+create external table milsongs (
+c1 INT,
+c2 DOUBLE,
+c3 DOUBLE,
+c4 DOUBLE,
+c5 DOUBLE,
+c6 DOUBLE,
+c7 DOUBLE,
+c8 DOUBLE,
+c9 DOUBLE,
+c10 DOUBLE,
+c11 DOUBLE,
+c12 DOUBLE,
+c13 DOUBLE,
+c14 DOUBLE,
+c15 DOUBLE,
+c16 DOUBLE,
+c17 DOUBLE,
+c18 DOUBLE,
+c19 DOUBLE,
+c20 DOUBLE,
+c21 DOUBLE,
+c22 DOUBLE,
+c23 DOUBLE,
+c24 DOUBLE,
+c25 DOUBLE,
+c26 DOUBLE,
+c27 DOUBLE,
+c28 DOUBLE,
+c29 DOUBLE,
+c30 DOUBLE,
+c31 DOUBLE,
+c32 DOUBLE,
+c33 DOUBLE,
+c34 DOUBLE,
+c35 DOUBLE,
+c36 DOUBLE,
+c37 DOUBLE,
+c38 DOUBLE,
+c39 DOUBLE,
+c40 DOUBLE,
+c41 DOUBLE,
+c42 DOUBLE,
+c43 DOUBLE,
+c44 DOUBLE,
+c45 DOUBLE,
+c46 DOUBLE,
+c47 DOUBLE,
+c48 DOUBLE,
+c49 DOUBLE,
+c50 DOUBLE,
+c51 DOUBLE,
+c52 DOUBLE,
+c53 DOUBLE,
+c54 DOUBLE,
+c55 DOUBLE,
+c56 DOUBLE,
+c57 DOUBLE,
+c58 DOUBLE,
+c59 DOUBLE,
+c60 DOUBLE,
+c61 DOUBLE,
+c62 DOUBLE,
+c63 DOUBLE,
+c64 DOUBLE,
+c65 DOUBLE,
+c66 DOUBLE,
+c67 DOUBLE,
+c68 DOUBLE,
+c69 DOUBLE,
+c70 DOUBLE,
+c71 DOUBLE,
+c72 DOUBLE,
+c73 DOUBLE,
+c74 DOUBLE,
+c75 DOUBLE,
+c76 DOUBLE,
+c77 DOUBLE,
+c78 DOUBLE,
+c79 DOUBLE,
+c80 DOUBLE,
+c81 DOUBLE,
+c82 DOUBLE,
+c83 DOUBLE,
+c84 DOUBLE,
+c85 DOUBLE,
+c86 DOUBLE,
+c87 DOUBLE,
+c88 DOUBLE,
+c89 DOUBLE,
+c90 DOUBLE,
+c91 DOUBLE)
\ No newline at end of file
diff --git a/settings.gradle b/settings.gradle
index 8ad4cf477edb..8d2aedcebf2b 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -17,6 +17,7 @@ include 'h2o-bindings'
 include 'h2o-test-integ'
 include 'h2o-test-accuracy'
 include 'h2o-avro-parser'
+include 'h2o-orc-parser'
 
 // Reconfigure scala projects to support cross compilation
 // The following code will create two projects for each included item:
@@ -30,7 +31,7 @@ scalaCrossCompile {
 
 // Make structure flat and avoid annoying dummy modules
 rootProject.children.each { project ->
-  if (project.name.equals("h2o-avro-parser")) {
+  if (project.name.equals("h2o-avro-parser") || project.name.equals("h2o-orc-parser")) {
     String projectDirName = "h2o-parsers/${project.name}"
     project.projectDir = new File(settingsDir, projectDirName)
   }
@@ -39,16 +40,16 @@ rootProject.children.each { project ->
 //
 // Include Hadoop builds only if requested
 //
-if (System.getProperty("user.name").equals("jenkins") 
-    || System.getenv("BUILD_HADOOP") != null
-    || System.getenv("H2O_TARGET") != null) {
+if (System.getProperty("user.name").equals("jenkins")
+        || System.getenv("BUILD_HADOOP") != null
+        || System.getenv("H2O_TARGET") != null) {
 
   // Default hadoop build targets
   def allTargets = [
           "cdh5.2", "cdh5.3", "cdh5.4.2", "cdh5.5.3", "cdh5.6.0", "cdh5.7.0",
           "hdp2.1", "hdp2.2", "hdp2.3", "hdp2.4",
           "mapr3.1.1", "mapr4.0.1", "mapr5.0", "mapr5.1"
-          ]
+  ]
   // Compute targets
   def targets = System.getenv("H2O_TARGET") != null ? System.getenv("H2O_TARGET").split(",").collect { it.trim() } : allTargets
   // Include selected/all Hadoop targets
@@ -60,4 +61,3 @@ if (System.getProperty("user.name").equals("jenkins")
     }
   }
 }
-