Skip to content

Commit

Permalink
fixes #2452: Added a local ivarator cache dir configuration
Browse files Browse the repository at this point in the history
  • Loading branch information
ivakegg committed Jun 28, 2024
1 parent 53a3ea8 commit bb4426e
Show file tree
Hide file tree
Showing 17 changed files with 72 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@
<!-- the zookeeper configuration (could be something like file:///opt/datawave/zookeeper-warehouse/conf/zoo.cfg, or the zookeeper list (somehost1:2181,somehost2:2181,…)-->
<property name="zookeeperConfig" value="${datawave.query.logic.logics.BaseEventQuery.zookeeperConfig}" />

<!-- This is injected via BaseEventQueryConfiguration.java and configured via configuration properties -->
<!-- the list of directories in which the query service puts its caches (selection always made on query service side) -->
<property name="localIvaratorCacheDirConfigs" ref="baseEventQueryLocalIvaratorCacheDirConfigs" />

<!-- This is injected via BaseEventQueryConfiguration.java and configured via configuration properties -->
<!-- the list of directories in which the ivarator puts its caches (selection always made on tserver side) -->
<property name="ivaratorCacheDirConfigs" ref="baseEventQueryIvaratorCacheDirConfigs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,8 @@
<property name="hdfsSiteConfigURLs" value="${hdfs.site.config.urls}" />
<!-- the zookeeper configuration (could be something like file:///opt/datawave/zookeeper-warehouse/conf/zoo.cfg, or the zookeeper list (somehost1:2181,somehost2:2181,…)-->
<property name="zookeeperConfig" value="${ivarator.zookeeper.hosts}" />
<!-- the list of directories in which the webserver puts its ivarator caches (selection always made on webserver side) -->
<property name="localIvaratorCacheDirConfigs" ref="LocalIvaratorCacheDirConfigs" />
<!-- the list of directories in which the ivarator puts its caches (selection always made on tserver side) -->
<property name="ivaratorCacheDirConfigs" ref="IvaratorCacheDirConfigs" />
<!-- the hdfs location where FST's will be stored (see maxOrExpansionFstThreshold above) -->
Expand Down Expand Up @@ -324,6 +326,10 @@
<property name="tfAggregationThresholdMs" value="-1" />
</bean>

<util:list id="LocalIvaratorCacheDirConfigs">
${local.ivarator.cache.dir.config}
</util:list>

<util:list id="IvaratorCacheDirConfigs">
${ivarator.cache.dir.config}
</util:list>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,10 @@ hdfs.site.config.urls=file:///etc/hadoop/conf/core-site.xml,file:///etc/hadoop/c
## - minAvailableStoragePercent - (Optional, Default: 0.0) A double, between 0.0 and 1.0, which specifies the
## minimum percent of available storage space required to persist to this ivarator path. If less than this
## percent is available, we will not use this ivarator path.
local.ivarator.cache.dir.config= \
<bean class="datawave.query.iterator.ivarator.IvaratorCacheDirConfig"> \
\n <constructor-arg value="file:///tmp/IvaratorCache" /> \
\n </bean>
ivarator.cache.dir.config= \
<bean class="datawave.query.iterator.ivarator.IvaratorCacheDirConfig"> \
\n <constructor-arg value="hdfs:///IvaratorCache" /> \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@
<!-- the zookeeper configuration (could be something like file:///opt/datawave/zookeeper-warehouse/conf/zoo.cfg, or the zookeeper list (somehost1:2181,somehost2:2181,…)-->
<property name="zookeeperConfig" value="${accumulo.zookeepers}" />

<!-- This is injected via BaseEventQueryConfiguration.java and configured via configuration properties -->
<!-- the list of directories in which the query service puts its caches (selection always made on query service side) -->
<property name="localIvaratorCacheDirConfigs" ref="baseEventQueryLocalIvaratorCacheDirConfigs" />

<!-- This is injected via BaseEventQueryConfiguration.java and configured via configuration properties -->
<!-- the list of directories in which the ivarator puts its caches (selection always made on tserver side) -->
<property name="ivaratorCacheDirConfigs" ref="baseEventQueryIvaratorCacheDirConfigs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,8 @@
<property name="hdfsSiteConfigURLs" value="file:///etc/hadoop/conf/core-site.xml,file:///etc/hadoop/conf/hdfs-site.xml" />
<!-- the zookeeper configuration (could be something like file:///opt/datawave/zookeeper-warehouse/conf/zoo.cfg, or the zookeeper list (somehost1:2181,somehost2:2181,…)-->
<property name="zookeeperConfig" value="" />
<!-- the list of directories in which the webserver puts its ivarator caches (selection always made on webserver side) -->
<property name="localIvaratorCacheDirConfigs" ref="LocalIvaratorCacheDirConfigs" />
<!-- the list of directories in which the ivarator puts its caches (selection always made on tserver side) -->
<property name="ivaratorCacheDirConfigs" ref="IvaratorCacheDirConfigs" />
<!-- the hdfs location where FST's will be stored (see maxOrExpansionFstThreshold above) -->
Expand Down Expand Up @@ -316,6 +318,12 @@
<property name="tfAggregationThresholdMs" value="-1" />
</bean>

<util:list id="IvaratorCacheDirConfigs">
<bean class="datawave.query.iterator.ivarator.IvaratorCacheDirConfig">
<constructor-arg value="file:///tmp/IvaratorCache" />
</bean>
</util:list>

<util:list id="IvaratorCacheDirConfigs">
<bean class="datawave.query.iterator.ivarator.IvaratorCacheDirConfig">
<constructor-arg value="hdfs:///IvaratorCache" />
Expand Down
1 change: 1 addition & 0 deletions properties/bamboo.properties
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#This is for the unit testing of spring configuration files by bamboo
accumulo.user.password=testdummy
local.ivarator.cache.dir.config=
ivarator.cache.dir.config=
ivarator.fst.hdfs.base.uris=
4 changes: 4 additions & 0 deletions properties/default.properties
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,10 @@ hdfs.site.config.urls=file:///etc/hadoop/conf/core-site.xml,file:///etc/hadoop/c
## - minAvailableStoragePercent - (Optional, Default: 0.0) A double, between 0.0 and 1.0, which specifies the
## minimum percent of available storage space required to persist to this ivarator path. If less than this
## percent is available, we will not use this ivarator path.
local.ivarator.cache.dir.config= \
<bean class="datawave.query.iterator.ivarator.IvaratorCacheDirConfig"> \
\n <constructor-arg value="file:///tmp/IvaratorCache" /> \
\n </bean>
ivarator.cache.dir.config= \
<bean class="datawave.query.iterator.ivarator.IvaratorCacheDirConfig"> \
\n <constructor-arg value="hdfs:///IvaratorCache" /> \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,10 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement
private String hdfsSiteConfigURLs = null;
private String hdfsFileCompressionCodec = null;
private String zookeeperConfig = null;
// tserver side ivarator cache dir configs
private List<IvaratorCacheDirConfig> ivaratorCacheDirConfigs = Collections.emptyList();
// webserver side ivarator cache dir configs (e.g. for unique transform on webserver)
private List<IvaratorCacheDirConfig> localIvaratorCacheDirConfigs = Collections.emptyList();
private String ivaratorFstHdfsBaseURIs = null;
private int ivaratorCacheBufferSize = 10000;

Expand Down Expand Up @@ -671,6 +674,8 @@ public void copyFrom(ShardQueryConfiguration other) {
this.setHdfsSiteConfigURLs(other.getHdfsSiteConfigURLs());
this.setHdfsFileCompressionCodec(other.getHdfsFileCompressionCodec());
this.setZookeeperConfig(other.getZookeeperConfig());
this.setLocalIvaratorCacheDirConfigs(
null == other.getLocalIvaratorCacheDirConfigs() ? null : Lists.newArrayList(other.getLocalIvaratorCacheDirConfigs()));
this.setIvaratorCacheDirConfigs(null == other.getIvaratorCacheDirConfigs() ? null : Lists.newArrayList(other.getIvaratorCacheDirConfigs()));
this.setIvaratorFstHdfsBaseURIs(other.getIvaratorFstHdfsBaseURIs());
this.setIvaratorCacheBufferSize(other.getIvaratorCacheBufferSize());
Expand Down Expand Up @@ -766,6 +771,8 @@ public ShardQueryConfiguration(ShardQueryConfiguration other, Collection<QueryDa

this.setHdfsSiteConfigURLs(other.getHdfsSiteConfigURLs());
this.setHdfsFileCompressionCodec(other.getHdfsFileCompressionCodec());
this.setLocalIvaratorCacheDirConfigs(
null == other.getLocalIvaratorCacheDirConfigs() ? null : Lists.newArrayList(other.getLocalIvaratorCacheDirConfigs()));
this.setIvaratorCacheDirConfigs(null == other.getIvaratorCacheDirConfigs() ? null : Lists.newArrayList(other.getIvaratorCacheDirConfigs()));
this.setIvaratorFstHdfsBaseURIs(other.getIvaratorFstHdfsBaseURIs());

Expand Down Expand Up @@ -1486,6 +1493,14 @@ public void setIvaratorCacheDirConfigs(List<IvaratorCacheDirConfig> ivaratorCach
this.ivaratorCacheDirConfigs = ivaratorCacheDirConfigs;
}

public void setLocalIvaratorCacheDirConfigs(List<IvaratorCacheDirConfig> localIvaratorCacheDirConfigs) {
this.localIvaratorCacheDirConfigs = localIvaratorCacheDirConfigs;
}

public List<IvaratorCacheDirConfig> getLocalIvaratorCacheDirConfigs() {
return localIvaratorCacheDirConfigs;
}

public String getIvaratorFstHdfsBaseURIs() {
return ivaratorFstHdfsBaseURIs;
}
Expand Down Expand Up @@ -2951,6 +2966,7 @@ public boolean equals(Object o) {
Objects.equals(getHdfsSiteConfigURLs(), that.getHdfsSiteConfigURLs()) &&
Objects.equals(getHdfsFileCompressionCodec(), that.getHdfsFileCompressionCodec()) &&
Objects.equals(getZookeeperConfig(), that.getZookeeperConfig()) &&
Objects.equals(getLocalIvaratorCacheDirConfigs(), that.getLocalIvaratorCacheDirConfigs()) &&
Objects.equals(getIvaratorCacheDirConfigs(), that.getIvaratorCacheDirConfigs()) &&
Objects.equals(getIvaratorFstHdfsBaseURIs(), that.getIvaratorFstHdfsBaseURIs()) &&
Objects.equals(getQueryModel(), that.getQueryModel()) &&
Expand Down Expand Up @@ -3130,6 +3146,7 @@ public int hashCode() {
getHdfsSiteConfigURLs(),
getHdfsFileCompressionCodec(),
getZookeeperConfig(),
getLocalIvaratorCacheDirConfigs(),
getIvaratorCacheDirConfigs(),
getIvaratorFstHdfsBaseURIs(),
getIvaratorCacheBufferSize(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1548,7 +1548,7 @@ protected UniqueTransform getUniqueTransform() throws IOException {
.withUniqueFields(getUniqueFields())
.withQueryExecutionForPageTimeout(getResultTimeout())
.withBufferPersistThreshold(getUniqueCacheBufferSize())
.withIvaratorCacheDirConfigs(getIvaratorCacheDirConfigs())
.withLocalIvaratorCacheDirConfigs(getIvaratorCacheDirConfigs())
.withHdfsSiteConfigURLs(getHdfsSiteConfigURLs())
.withSubDirectory(getQueryId() + "-" + getScanId())
.withMaxOpenFiles(getIvaratorMaxOpenFiles())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -714,7 +714,7 @@ private void addConfigBasedTransformers() throws QueryException {
.withQueryExecutionForPageTimeout(this.getQueryExecutionForPageTimeout())
.withModel(getQueryModel())
.withBufferPersistThreshold(getUniqueCacheBufferSize())
.withIvaratorCacheDirConfigs(getIvaratorCacheDirConfigs())
.withLocalIvaratorCacheDirConfigs(getLocalIvaratorCacheDirConfigs())
.withHdfsSiteConfigURLs(getHdfsSiteConfigURLs())
.withSubDirectory(getConfig().getQuery().getId().toString())
.withMaxOpenFiles(getIvaratorMaxOpenFiles())
Expand Down Expand Up @@ -1963,6 +1963,14 @@ public void setFullTableScanEnabled(boolean fullTableScanEnabled) {
getConfig().setFullTableScanEnabled(fullTableScanEnabled);
}

public void setLocalIvaratorCacheDirConfigs(List<IvaratorCacheDirConfig> localIvaratorCacheDirConfigs) {
getConfig().setLocalIvaratorCacheDirConfigs(localIvaratorCacheDirConfigs);
}

public List<IvaratorCacheDirConfig> getLocalIvaratorCacheDirConfigs() {
return getConfig().getLocalIvaratorCacheDirConfigs();
}

public List<IvaratorCacheDirConfig> getIvaratorCacheDirConfigs() {
return getConfig().getIvaratorCacheDirConfigs();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,7 @@ public Builder withBufferPersistThreshold(int bufferPersistThreshold) {
return this;
}

public Builder withIvaratorCacheDirConfigs(List<IvaratorCacheDirConfig> ivaratorCacheDirConfigs) {
public Builder withLocalIvaratorCacheDirConfigs(List<IvaratorCacheDirConfig> ivaratorCacheDirConfigs) {
this.ivaratorCacheDirConfigs = ivaratorCacheDirConfigs;
return this;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,8 @@ public void setUp() throws Exception {
updatedValues.put("hdfsFileCompressionCodec", "sunny");
defaultValues.put("zookeeperConfig", null);
updatedValues.put("zookeeperConfig", "file://etc/zookeeper/conf");
defaultValues.put("localIvaratorCacheDirConfigs", Collections.emptyList());
updatedValues.put("localIvaratorCacheDirConfigs", Lists.newArrayList(new IvaratorCacheDirConfig("file:///tmp/ivarators")));
defaultValues.put("ivaratorCacheDirConfigs", Collections.emptyList());
updatedValues.put("ivaratorCacheDirConfigs", Lists.newArrayList(new IvaratorCacheDirConfig("hdfs://instance-a/ivarators")));
defaultValues.put("ivaratorFstHdfsBaseURIs", null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ public void setUp() throws IOException {
// setup a directory for cache results
File tmpDir = temporaryFolder.newFolder();
IvaratorCacheDirConfig config = new IvaratorCacheDirConfig(tmpDir.toURI().toString());
logic.setLocalIvaratorCacheDirConfigs(Collections.singletonList(config));
logic.setIvaratorCacheDirConfigs(Collections.singletonList(config));

QueryImpl query = new QueryImpl();
Expand All @@ -50,7 +51,7 @@ protected UniqueTransform getUniqueTransform() {
.withUniqueFields(uniqueFields)
.withQueryExecutionForPageTimeout(Long.MAX_VALUE)
.withBufferPersistThreshold(logic.getUniqueCacheBufferSize())
.withIvaratorCacheDirConfigs(logic.getIvaratorCacheDirConfigs())
.withLocalIvaratorCacheDirConfigs(logic.getLocalIvaratorCacheDirConfigs())
.withHdfsSiteConfigURLs(logic.getHdfsSiteConfigURLs())
.withSubDirectory(logic.getConfig().getQuery().getId().toString())
.withMaxOpenFiles(logic.getIvaratorMaxOpenFiles())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@
<!-- The max value or list expansion for a single field using an FST ivarator -->
<property name="maxOrExpansionFstThreshold" value="20" /> <!-- was ${beq.orExpansionFstThreshold} -->
<property name="hdfsSiteConfigURLs" value="" /> <!-- was ${hdfs.site.config.urls} -->
<property name="localIvaratorCacheDirConfigs" ref="IvaratorCacheDirConfigs" /> <!-- was ${local.ivarator.cache.dir.config} -->
<property name="ivaratorCacheDirConfigs" ref="IvaratorCacheDirConfigs" /> <!-- was ${ivarator.cache.dir.config} -->
<property name="ivaratorCacheBufferSize" value="10000" />
<property name="ivaratorCacheScanPersistThreshold" value="100000" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@
<!-- The max value or list expansion for a single field using an FST ivarator -->
<property name="maxOrExpansionFstThreshold" value="20" /> <!-- was ${beq.orExpansionFstThreshold} -->
<property name="hdfsSiteConfigURLs" value="" /> <!-- was ${hdfs.site.config.urls} -->
<property name="localIvaratorCacheDirConfigs" ref="IvaratorCacheDirConfigs" /> <!-- was ${local.ivarator.cache.dir.config} -->
<property name="ivaratorCacheDirConfigs" ref="IvaratorCacheDirConfigs" /> <!-- was ${ivarator.cache.dir.config} -->
<property name="ivaratorCacheBufferSize" value="10000" />
<property name="ivaratorCacheScanPersistThreshold" value="100000" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,8 @@
<property name="hdfsSiteConfigURLs" value="${hdfs.site.config.urls}" />
<!-- the zookeeper configuration (could be something like file:///opt/datawave/zookeeper-warehouse/conf/zoo.cfg, or the zookeeper list (somehost1:2181,somehost2:2181,…)-->
<property name="zookeeperConfig" value="${ivarator.zookeeper.hosts}" />
<!-- the list of directories in which the webserver puts its ivarator caches (selection always made on webserver side) -->
<property name="localIvaratorCacheDirConfigs" ref="LocalIvaratorCacheDirConfigs" />
<!-- the list of directories in which the ivarator puts its caches (selection always made on tserver side) -->
<property name="ivaratorCacheDirConfigs" ref="IvaratorCacheDirConfigs" />
<!-- the hdfs location where FST's will be stored (see maxOrExpansionFstThreshold above) -->
Expand Down Expand Up @@ -325,6 +327,10 @@
<property name="tfAggregationThresholdMs" value="-1" />
</bean>

<util:list id="LocalIvaratorCacheDirConfigs">
${local.ivarator.cache.dir.config}
</util:list>

<util:list id="IvaratorCacheDirConfigs">
${ivarator.cache.dir.config}
</util:list>
Expand Down

0 comments on commit bb4426e

Please sign in to comment.