diff --git a/.classpath b/.classpath index b80f32b..50b494c 100644 --- a/.classpath +++ b/.classpath @@ -2,8 +2,8 @@ - - - + + + diff --git a/bin/lucene_read_file/BM25FSimilarity$BM25DocScorer.class b/bin/lucene_read_file/BM25FSimilarity$BM25DocScorer.class index a9b6e25..99aded0 100644 Binary files a/bin/lucene_read_file/BM25FSimilarity$BM25DocScorer.class and b/bin/lucene_read_file/BM25FSimilarity$BM25DocScorer.class differ diff --git a/bin/lucene_read_file/BM25FSimilarity$BM25Stats.class b/bin/lucene_read_file/BM25FSimilarity$BM25Stats.class index 26b82f0..22667e5 100644 Binary files a/bin/lucene_read_file/BM25FSimilarity$BM25Stats.class and b/bin/lucene_read_file/BM25FSimilarity$BM25Stats.class differ diff --git a/bin/lucene_read_file/BM25FSimilarity.class b/bin/lucene_read_file/BM25FSimilarity.class index 2500ab4..76a9a30 100644 Binary files a/bin/lucene_read_file/BM25FSimilarity.class and b/bin/lucene_read_file/BM25FSimilarity.class differ diff --git a/bin/lucene_read_file/LuceneReadIndexFromFileExample.class b/bin/lucene_read_file/LuceneReadIndexFromFileExample.class index a7b3958..20b93fe 100644 Binary files a/bin/lucene_read_file/LuceneReadIndexFromFileExample.class and b/bin/lucene_read_file/LuceneReadIndexFromFileExample.class differ diff --git a/bin/lucene_read_file/LuceneWriteIndexFromFileExample$1.class b/bin/lucene_read_file/LuceneWriteIndexFromFileExample$1.class index be2ed95..91dc190 100644 Binary files a/bin/lucene_read_file/LuceneWriteIndexFromFileExample$1.class and b/bin/lucene_read_file/LuceneWriteIndexFromFileExample$1.class differ diff --git a/bin/lucene_read_file/LuceneWriteIndexFromFileExample.class b/bin/lucene_read_file/LuceneWriteIndexFromFileExample.class index 41318dd..7a766ad 100644 Binary files a/bin/lucene_read_file/LuceneWriteIndexFromFileExample.class and b/bin/lucene_read_file/LuceneWriteIndexFromFileExample.class differ diff --git a/build.xml b/build.xml new file mode 100644 index 0000000..72a21fe --- /dev/null +++ b/build.xml @@ -0,0 +1,73 @@ + + + + + + + + + + + Builds, tests, and runs the project lucene_read_file. + + + diff --git a/build/built-jar.properties b/build/built-jar.properties new file mode 100644 index 0000000..2644363 --- /dev/null +++ b/build/built-jar.properties @@ -0,0 +1,4 @@ +#Sun, 09 Dec 2018 19:27:17 +0530 + + +E\:\\workspace3\\LuceneNewsSearch-Final= diff --git a/build/classes/.netbeans_automatic_build b/build/classes/.netbeans_automatic_build new file mode 100644 index 0000000..e69de29 diff --git a/build/classes/.netbeans_update_resources b/build/classes/.netbeans_update_resources new file mode 100644 index 0000000..e69de29 diff --git a/build/classes/lucene_read_file/BM25FSimilarity$BM25DocScorer.class b/build/classes/lucene_read_file/BM25FSimilarity$BM25DocScorer.class new file mode 100644 index 0000000..cb3f0a4 Binary files /dev/null and b/build/classes/lucene_read_file/BM25FSimilarity$BM25DocScorer.class differ diff --git a/build/classes/lucene_read_file/BM25FSimilarity$BM25Stats.class b/build/classes/lucene_read_file/BM25FSimilarity$BM25Stats.class new file mode 100644 index 0000000..13c97a6 Binary files /dev/null and b/build/classes/lucene_read_file/BM25FSimilarity$BM25Stats.class differ diff --git a/build/classes/lucene_read_file/BM25FSimilarity.class b/build/classes/lucene_read_file/BM25FSimilarity.class new file mode 100644 index 0000000..8bd52c7 Binary files /dev/null and b/build/classes/lucene_read_file/BM25FSimilarity.class differ diff --git a/build/classes/lucene_read_file/LuceneReadIndexFromExample_new.class b/build/classes/lucene_read_file/LuceneReadIndexFromExample_new.class new file mode 100644 index 0000000..27c159f Binary files /dev/null and b/build/classes/lucene_read_file/LuceneReadIndexFromExample_new.class differ diff --git a/build/classes/lucene_read_file/NewJFrame1$1.class b/build/classes/lucene_read_file/NewJFrame1$1.class new file mode 100644 index 0000000..00c17b9 Binary files /dev/null and b/build/classes/lucene_read_file/NewJFrame1$1.class differ diff --git a/build/classes/lucene_read_file/NewJFrame1$2.class b/build/classes/lucene_read_file/NewJFrame1$2.class new file mode 100644 index 0000000..521e90a Binary files /dev/null and b/build/classes/lucene_read_file/NewJFrame1$2.class differ diff --git a/build/classes/lucene_read_file/NewJFrame1$3.class b/build/classes/lucene_read_file/NewJFrame1$3.class new file mode 100644 index 0000000..1763d8e Binary files /dev/null and b/build/classes/lucene_read_file/NewJFrame1$3.class differ diff --git a/build/classes/lucene_read_file/NewJFrame1.class b/build/classes/lucene_read_file/NewJFrame1.class new file mode 100644 index 0000000..5bc21c6 Binary files /dev/null and b/build/classes/lucene_read_file/NewJFrame1.class differ diff --git a/build/classes/lucene_read_file/NewJFrame1.form b/build/classes/lucene_read_file/NewJFrame1.form new file mode 100644 index 0000000..a4e5cbc --- /dev/null +++ b/build/classes/lucene_read_file/NewJFrame1.form @@ -0,0 +1,106 @@ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
diff --git a/build/classes/lucene_read_file/new_index_writer$1.class b/build/classes/lucene_read_file/new_index_writer$1.class new file mode 100644 index 0000000..8924aea Binary files /dev/null and b/build/classes/lucene_read_file/new_index_writer$1.class differ diff --git a/build/classes/lucene_read_file/new_index_writer$2.class b/build/classes/lucene_read_file/new_index_writer$2.class new file mode 100644 index 0000000..d7e5c42 Binary files /dev/null and b/build/classes/lucene_read_file/new_index_writer$2.class differ diff --git a/build/classes/lucene_read_file/new_index_writer.class b/build/classes/lucene_read_file/new_index_writer.class new file mode 100644 index 0000000..3669dc8 Binary files /dev/null and b/build/classes/lucene_read_file/new_index_writer.class differ diff --git a/dist/README.TXT b/dist/README.TXT new file mode 100644 index 0000000..509572b --- /dev/null +++ b/dist/README.TXT @@ -0,0 +1,32 @@ +======================== +BUILD OUTPUT DESCRIPTION +======================== + +When you build an Java application project that has a main class, the IDE +automatically copies all of the JAR +files on the projects classpath to your projects dist/lib folder. The IDE +also adds each of the JAR files to the Class-Path element in the application +JAR files manifest file (MANIFEST.MF). + +To run the project from the command line, go to the dist folder and +type the following: + +java -jar "lucene_read_file.jar" + +To distribute this project, zip up the dist folder (including the lib folder) +and distribute the ZIP file. + +Notes: + +* If two JAR files on the project classpath have the same name, only the first +JAR file is copied to the lib folder. +* Only JAR files are copied to the lib folder. +If the classpath contains other types of files or folders, these files (folders) +are not copied. +* If a library on the projects classpath also has a Class-Path element +specified in the manifest,the content of the Class-Path element has to be on +the projects runtime path. +* To set a main class in a standard Java project, right-click the project node +in the Projects window and choose Properties. Then click Run and enter the +class name in the Main Class field. Alternatively, you can manually type the +class name in the manifest Main-Class element. diff --git a/dist/indexedFiles/write.lock b/dist/indexedFiles/write.lock new file mode 100644 index 0000000..e69de29 diff --git a/dist/lib/lucene-analyzers-common-6.2.0.jar b/dist/lib/lucene-analyzers-common-6.2.0.jar new file mode 100644 index 0000000..a4d4e69 Binary files /dev/null and b/dist/lib/lucene-analyzers-common-6.2.0.jar differ diff --git a/dist/lib/lucene-core-6.2.0.jar b/dist/lib/lucene-core-6.2.0.jar new file mode 100644 index 0000000..f60474d Binary files /dev/null and b/dist/lib/lucene-core-6.2.0.jar differ diff --git a/dist/lib/lucene-queryparser-6.2.0.jar b/dist/lib/lucene-queryparser-6.2.0.jar new file mode 100644 index 0000000..dff797e Binary files /dev/null and b/dist/lib/lucene-queryparser-6.2.0.jar differ diff --git a/indexedFiles/_x.cfe b/indexedFiles/_x.cfe new file mode 100644 index 0000000..868a1c8 Binary files /dev/null and b/indexedFiles/_x.cfe differ diff --git a/indexedFiles/_x.cfs b/indexedFiles/_x.cfs new file mode 100644 index 0000000..07f0fb3 Binary files /dev/null and b/indexedFiles/_x.cfs differ diff --git a/indexedFiles/_x.si b/indexedFiles/_x.si new file mode 100644 index 0000000..cb1e72a Binary files /dev/null and b/indexedFiles/_x.si differ diff --git a/indexedFiles/segments_w b/indexedFiles/segments_w new file mode 100644 index 0000000..f76a4a9 Binary files /dev/null and b/indexedFiles/segments_w differ diff --git a/nbproject/build-impl.xml b/nbproject/build-impl.xml new file mode 100644 index 0000000..3933ab7 --- /dev/null +++ b/nbproject/build-impl.xml @@ -0,0 +1,1403 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Must set src.dir + Must set build.dir + Must set dist.dir + Must set build.classes.dir + Must set dist.javadoc.dir + Must set build.test.classes.dir + Must set build.test.results.dir + Must set build.classes.excludes + Must set dist.jar + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Must set javac.includes + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + No tests executed. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Must set JVM to use for profiling in profiler.info.jvm + Must set profiler agent JVM arguments in profiler.info.jvmargs.agent + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Must select some files in the IDE or set javac.includes + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + To run this application from the command line without Ant, try: + + java -jar "${dist.jar.resolved}" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Must select one file in the IDE or set run.class + + + + Must select one file in the IDE or set run.class + + + + + + + + + + + + + + + + + + + + + + + Must select one file in the IDE or set debug.class + + + + + Must select one file in the IDE or set debug.class + + + + + Must set fix.includes + + + + + + + + + + This target only works when run from inside the NetBeans IDE. + + + + + + + + + Must select one file in the IDE or set profile.class + This target only works when run from inside the NetBeans IDE. + + + + + + + + + This target only works when run from inside the NetBeans IDE. + + + + + + + + + + + + + This target only works when run from inside the NetBeans IDE. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Must select one file in the IDE or set run.class + + + + + + Must select some files in the IDE or set test.includes + + + + + Must select one file in the IDE or set run.class + + + + + Must select one file in the IDE or set applet.url + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Must select some files in the IDE or set javac.includes + + + + + + + + + + + + + + + + + + Some tests failed; see details above. + + + + + + + + + Must select some files in the IDE or set test.includes + + + + Some tests failed; see details above. + + + + Must select some files in the IDE or set test.class + Must select some method in the IDE or set test.method + + + + Some tests failed; see details above. + + + + + Must select one file in the IDE or set test.class + + + + Must select one file in the IDE or set test.class + Must select some method in the IDE or set test.method + + + + + + + + + + + + + + Must select one file in the IDE or set applet.url + + + + + + + + + Must select one file in the IDE or set applet.url + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/nbproject/genfiles.properties b/nbproject/genfiles.properties new file mode 100644 index 0000000..11b1627 --- /dev/null +++ b/nbproject/genfiles.properties @@ -0,0 +1,8 @@ +build.xml.data.CRC32=35e2b44d +build.xml.script.CRC32=ca968c1d +build.xml.stylesheet.CRC32=8064a381@1.80.1.48 +# This file is used by a NetBeans-based IDE to track changes in generated files such as build-impl.xml. +# Do not edit this file. You may delete it but then the IDE will never regenerate such files for you. +nbproject/build-impl.xml.data.CRC32=35e2b44d +nbproject/build-impl.xml.script.CRC32=ae62c18d +nbproject/build-impl.xml.stylesheet.CRC32=830a3534@1.80.1.48 diff --git a/nbproject/private/config.properties b/nbproject/private/config.properties new file mode 100644 index 0000000..e69de29 diff --git a/nbproject/private/private.properties b/nbproject/private/private.properties new file mode 100644 index 0000000..fadbf98 --- /dev/null +++ b/nbproject/private/private.properties @@ -0,0 +1,6 @@ +compile.on.save=true +do.depend=false +do.jar=true +javac.debug=true +javadoc.preview=true +user.properties.file=C:\\Users\\Kaustubh\\AppData\\Roaming\\NetBeans\\8.2\\build.properties diff --git a/nbproject/private/private.xml b/nbproject/private/private.xml new file mode 100644 index 0000000..97ed477 --- /dev/null +++ b/nbproject/private/private.xml @@ -0,0 +1,12 @@ + + + + + + file:/E:/workspace3/LuceneNewsSearch-Final/src/lucene_read_file/new_index_writer.java + file:/E:/workspace3/LuceneNewsSearch-Final/src/lucene_read_file/NewJFrame1.java + file:/E:/workspace3/LuceneNewsSearch-Final/src/lucene_read_file/LuceneReadIndexFromExample_new.java + file:/E:/workspace3/LuceneNewsSearch-Final/src/lucene_read_file/BM25FSimilarity.java + + + diff --git a/nbproject/project.properties b/nbproject/project.properties new file mode 100644 index 0000000..79b3feb --- /dev/null +++ b/nbproject/project.properties @@ -0,0 +1,105 @@ +annotation.processing.enabled=true +annotation.processing.enabled.in.editor=false +annotation.processing.processors.list= +annotation.processing.run.all.processors=true +annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output +application.title=lucene_read_file +application.vendor=Kaustubh +auxiliary.org-netbeans-modules-projectimport-eclipse-core.key=src=src;file=C:/lucene/lucene-6.2.0/queryparser/lucene-queryparser-6.2.0.jar;file=C:/lucene/lucene-6.2.0/core/lucene-core-6.2.0.jar;file=C:/lucene/lucene-6.2.0/analysis/common/lucene-analyzers-common-6.2.0.jar;output=bin; +auxiliary.org-netbeans-modules-projectimport-eclipse-core.project=. +auxiliary.org-netbeans-modules-projectimport-eclipse-core.timestamp=1544344642000 +auxiliary.org-netbeans-modules-projectimport-eclipse-core.workspace=.. +auxiliary.org-netbeans-spi-editor-hints-projects.perProjectHintSettingsFile=nbproject/cfg_hints.xml +build.classes.dir=${build.dir}/classes +build.classes.excludes=**/*.java,**/*.form +# This directory is removed when the project is cleaned: +build.dir=build +build.generated.dir=${build.dir}/generated +build.generated.sources.dir=${build.dir}/generated-sources +# Only compile against the classpath explicitly listed here: +build.sysclasspath=ignore +build.test.classes.dir=${build.dir}/test/classes +build.test.results.dir=${build.dir}/test/results +# Uncomment to specify the preferred debugger connection transport: +#debug.transport=dt_socket +debug.classpath=\ + ${run.classpath} +debug.test.classpath=\ + ${run.test.classpath} +# Files in build.classes.dir which should be excluded from distribution jar +dist.archive.excludes= +# This directory is removed when the project is cleaned: +dist.dir=dist +dist.jar=${dist.dir}/lucene_read_file.jar +dist.javadoc.dir=${dist.dir}/javadoc +endorsed.classpath= +excludes= +file.reference.lucene-analyzers-common-6.2.0.jar=C:\\lucene\\lucene-6.2.0\\analysis\\common\\lucene-analyzers-common-6.2.0.jar +file.reference.lucene-core-6.2.0.jar=C:\\lucene\\lucene-6.2.0\\core\\lucene-core-6.2.0.jar +file.reference.lucene-queryparser-6.2.0.jar=C:\\lucene\\lucene-6.2.0\\queryparser\\lucene-queryparser-6.2.0.jar +file.reference.LuceneNewsSearch-Final-src=src +includes=** +jar.archive.disabled=${jnlp.enabled} +jar.compress=false +jar.index=${jnlp.enabled} +javac.classpath=\ + ${file.reference.lucene-queryparser-6.2.0.jar}:\ + ${file.reference.lucene-core-6.2.0.jar}:\ + ${file.reference.lucene-analyzers-common-6.2.0.jar} +# Space-separated list of extra javac options +javac.compilerargs= +javac.deprecation=false +javac.external.vm=true +javac.processorpath=\ + ${javac.classpath} +javac.source=1.8 +javac.target=1.8 +javac.test.classpath=\ + ${javac.classpath}:\ + ${build.classes.dir} +javac.test.processorpath=\ + ${javac.test.classpath} +javadoc.additionalparam= +javadoc.author=false +javadoc.encoding=${source.encoding} +javadoc.noindex=false +javadoc.nonavbar=false +javadoc.notree=false +javadoc.private=false +javadoc.splitindex=true +javadoc.use=true +javadoc.version=false +javadoc.windowtitle= +jnlp.codebase.type=no.codebase +jnlp.descriptor=application +jnlp.enabled=false +jnlp.mixed.code=default +jnlp.offline-allowed=false +jnlp.signed=false +jnlp.signing= +jnlp.signing.alias= +jnlp.signing.keystore= +main.class=lucene_read_file.NewJFrame1 +# Optional override of default Application-Library-Allowable-Codebase attribute identifying the locations where your signed RIA is expected to be found. +manifest.custom.application.library.allowable.codebase= +# Optional override of default Caller-Allowable-Codebase attribute identifying the domains from which JavaScript code can make calls to your RIA without security prompts. +manifest.custom.caller.allowable.codebase= +# Optional override of default Codebase manifest attribute, use to prevent RIAs from being repurposed +manifest.custom.codebase= +# Optional override of default Permissions manifest attribute (supported values: sandbox, all-permissions) +manifest.custom.permissions= +meta.inf.dir=${src.dir}/META-INF +mkdist.disabled=false +platform.active=default_platform +run.classpath=\ + ${javac.classpath}:\ + ${build.classes.dir} +# Space-separated list of JVM arguments used when running the project. +# You may also define separate properties like run-sys-prop.name=value instead of -Dname=value. +# To set system properties for unit tests define test-sys-prop.name=value: +run.jvmargs= +run.test.classpath=\ + ${javac.test.classpath}:\ + ${build.test.classes.dir} +source.encoding=UTF-8 +src.dir=${file.reference.LuceneNewsSearch-Final-src} diff --git a/nbproject/project.xml b/nbproject/project.xml new file mode 100644 index 0000000..2efb08a --- /dev/null +++ b/nbproject/project.xml @@ -0,0 +1,13 @@ + + + org.netbeans.modules.java.j2seproject + + + lucene_read_file + + + + + + + diff --git a/src/lucene_read_file/BM25FSimilarity.java b/src/lucene_read_file/BM25FSimilarity.java index 29ec933..43f37d6 100644 --- a/src/lucene_read_file/BM25FSimilarity.java +++ b/src/lucene_read_file/BM25FSimilarity.java @@ -1,19 +1,4 @@ package lucene_read_file; -/*Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ import java.io.IOException; @@ -30,23 +15,12 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.SmallFloat; -/** - * BM25 Similarity. Introduced in Stephen E. Robertson, Steve Walker, - * Susan Jones, Micheline Hancock-Beaulieu, and Mike Gatford. Okapi at TREC-3. - * In Proceedings of the Third Text REtrieval Conference (TREC 1994). - * Gaithersburg, USA, November 1994. - */ + public class BM25FSimilarity extends Similarity { private final float k1; private final float b; - /** - * BM25 with the supplied parameter values. - * @param k1 Controls non-linear term frequency normalization (saturation). - * @param b Controls to what degree document length normalizes tf values. - * @throws IllegalArgumentException if {@code k1} is infinite or negative, or if {@code b} is - * not within the range {@code [0..1]} - */ + public BM25FSimilarity(float k1, float b) { if ((Float.isFinite(k1) == false) || (k1 < 0)) { throw new IllegalArgumentException("illegal k1 value: " + k1 + ", must be a non-negative finite value"); @@ -58,80 +32,68 @@ public BM25FSimilarity(float k1, float b) { this.b = b; } - /** BM25 with these default values: - * - */ + public BM25FSimilarity() { this(1.2f, 0.75f); } - /** Implemented as log(1 + (docCount - docFreq + 0.5)/(docFreq + 0.5)). */ + protected float idf(long docFreq, long docCount) { return (float) Math.log(1 + (docCount - docFreq + 0.5D)/(docFreq + 0.5D)); } - /** Implemented as 1 / (distance + 1). */ + + + + + protected float sloppyFreq(int distance) { return 1.0f / (distance + 1); } - /** The default implementation returns 1 */ + protected float scorePayload(int doc, int start, int end, BytesRef payload) { return 1; } - /** The default implementation computes the average as sumTotalTermFreq / docCount, - * or returns 1 if the index does not store sumTotalTermFreq: - * any field that omits frequency information). */ + + + + protected float avgFieldLength(CollectionStatistics collectionStats) { final long sumTotalTermFreq = collectionStats.sumTotalTermFreq(); if (sumTotalTermFreq <= 0) { - return 1f; // field does not exist, or stat is unsupported + return 1f; } else { final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); return (float) (sumTotalTermFreq / (double) docCount); } } - /** The default implementation encodes boost / sqrt(length) - * with {@link SmallFloat#floatToByte315(float)}. This is compatible with - * Lucene's default implementation. If you change this, then you should - * change {@link #decodeNormValue(byte)} to match. */ + protected byte encodeNormValue(float boost, int fieldLength) { return SmallFloat.floatToByte315(boost / (float) Math.sqrt(fieldLength)); } - /** The default implementation returns 1 / f2 - * where f is {@link SmallFloat#byte315ToFloat(byte)}. */ + protected float decodeNormValue(byte b) { return NORM_TABLE[b & 0xFF]; } - /** - * True if overlap tokens (tokens with a position of increment of zero) are - * discounted from the document's length. - */ + protected boolean discountOverlaps = true; - /** Sets whether overlap tokens (Tokens with 0 position increment) are - * ignored when computing norm. By default this is true, meaning overlap - * tokens do not count when computing norms. */ + public void setDiscountOverlaps(boolean v) { discountOverlaps = v; } - /** - * Returns true if overlap tokens are discounted from the document's length. - * @see #setDiscountOverlaps - */ + public boolean getDiscountOverlaps() { return discountOverlaps; } - /** Cache of decoded bytes. */ + private static final float[] NORM_TABLE = new float[256]; static { @@ -149,28 +111,7 @@ public final long computeNorm(FieldInvertState state) { return encodeNormValue(state.getBoost(), numTerms); } - /** - * Computes a score factor for a simple term and returns an explanation - * for that score factor. - * - *

- * The default implementation uses: - * - *

-     * idf(docFreq, docCount);
-     * 
- * - * Note that {@link CollectionStatistics#docCount()} is used instead of - * {@link org.apache.lucene.index.IndexReader#numDocs() IndexReader#numDocs()} because also - * {@link TermStatistics#docFreq()} is used, and when the latter - * is inaccurate, so is {@link CollectionStatistics#docCount()}, and in the same direction. - * In addition, {@link CollectionStatistics#docCount()} does not skew when fields are sparse. - * - * @param collectionStats collection-level statistics - * @param termStats term-level statistics for the term - * @return an Explain object that includes both an idf score factor - and an explanation for the term. - */ + public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) { final long df = termStats.docFreq(); final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); @@ -178,19 +119,7 @@ public Explanation idfExplain(CollectionStatistics collectionStats, TermStatisti return Explanation.match(idf, "idf(docFreq=" + df + ", docCount=" + docCount + ")"); } - /** - * Computes a score factor for a phrase. - * - *

- * The default implementation sums the idf factor for - * each term in the phrase. - * - * @param collectionStats collection-level statistics - * @param termStats term-level statistics for the terms in the phrase - * @return an Explain object that includes both an idf - * score factor for the phrase and an explanation - * for each term. - */ + public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) { final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); float idf = 0.0f; @@ -334,18 +263,12 @@ public String toString() { return "BM25(k1=" + k1 + ",b=" + b + ")"; } - /** - * Returns the k1 parameter - * @see #BM25FSimilarity(float, float) - */ + public final float getK1() { return k1; } - /** - * Returns the b parameter - * @see #BM25FSimilarity(float, float) - */ + public final float getB() { return b; } diff --git a/src/lucene_read_file/LuceneReadIndexFromExample_new.java b/src/lucene_read_file/LuceneReadIndexFromExample_new.java index 0fe2c8e..da24947 100644 --- a/src/lucene_read_file/LuceneReadIndexFromExample_new.java +++ b/src/lucene_read_file/LuceneReadIndexFromExample_new.java @@ -6,6 +6,8 @@ import java.io.IOException; import java.nio.file.Paths; import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; import java.util.regex.*; import org.apache.lucene.analysis.standard.StandardAnalyzer; @@ -36,121 +38,134 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; +import org.apache.lucene.queryparser.classic.QueryParser; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopScoreDocCollector; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.BytesRef; + import java.io.IOException; +import java.util.Arrays; +import org.apache.lucene.analysis.core.SimpleAnalyzer; +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; public class LuceneReadIndexFromExample_new { //directory contains the lucene indexes private static final String INDEX_DIR = "indexedFiles"; - public static void main(String[] args) throws Exception + public static ArrayList pmain(String queryTerm) throws Exception { - //Create lucene searcher. It search over a single IndexReader. - IndexSearcher searcher = createSearcher(); - - //Search indexed contents using search term - String queryString= "Fret 31-12-2016 WASHINGTON"; - ArrayList extractedDates=getDates(queryString); - ArrayList processedDates=processDates(extractedDates); - ArrayList foundDateDocs=new ArrayList(); -// for(int i=0;i res = new ArrayList(); + FSDirectory dir = FSDirectory.open(Paths.get(INDEX_DIR)); + IndexReader reader = DirectoryReader.open(dir); +// test(reader); + IndexSearcher searcher = new IndexSearcher(reader); + //String queryTerm="Trump Winter 2016-12-31"; + if(queryTerm.compareTo("")==0){ + System.exit(0); + } + + System.out.println(queryTerm); +// queryTerm=soundexSentence(queryTerm); + Map boost = new HashMap(); + boost.put("date",0.5f); + boost.put("title",0.25f); + boost.put("body",0.25f); + + + + MultiFieldQueryParser qp = new MultiFieldQueryParser(new String[] {"date","title","body"}, new StandardAnalyzer(),boost); +// MultiFieldQueryParser qp = new MultiFieldQueryParser(new String[] {"description","name","year","location","developer"}, new StandardAnalyzer(),boost); + MultiFieldQueryParser qp2 = new MultiFieldQueryParser(new String[] {"date","title","body"},new WhitespaceAnalyzer(),boost); + + Query query = qp.parse(queryTerm); + System.out.println(query); + ArrayList dates_query= getDates(queryTerm); +// System.out.println("OK"); +// System.out.println(dates_query.size()); + float[] myarr=new float[101]; + if(!dates_query.isEmpty()){ + ArrayList processed_dates= processDates(dates_query); + String queryTerm2=""; + for(int i=0;i arr=new ArrayList(); -// //Merging to be done -// //System.out.println("Date Match:"); -//// for(int i=0;i getDates(String queryString){ @@ -233,6 +248,7 @@ private static IndexSearcher createSearcher() throws IOException //Index searcher IndexSearcher searcher = new IndexSearcher(reader); + searcher.setSimilarity(new BM25FSimilarity()); return searcher; } } diff --git a/src/lucene_read_file/LuceneReadIndexFromFileExample.java b/src/lucene_read_file/LuceneReadIndexFromFileExample.java index 3f3a6e8..085cbd3 100644 --- a/src/lucene_read_file/LuceneReadIndexFromFileExample.java +++ b/src/lucene_read_file/LuceneReadIndexFromFileExample.java @@ -1,26 +1,25 @@ package lucene_read_file; -import java.awt.List; - + import java.io.IOException; import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.regex.*; +import java.util.Arrays; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.similarities.BM25Similarity; +import org.apache.lucene.search.similarities.ClassicSimilarity; +import org.apache.lucene.search.similarities.TFIDFSimilarity; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.search.PhraseQuery; public class LuceneReadIndexFromFileExample { @@ -33,157 +32,38 @@ public static void main(String[] args) throws Exception IndexSearcher searcher = createSearcher(); //Search indexed contents using search term - String queryString= "Winter 2016-12-31 WASHINGTON"; - ArrayList extractedDates=getDates(queryString); - ArrayList processedDates=processDates(extractedDates); - ArrayList foundDateDocs=new ArrayList(); - for(int i=0;i arr=new ArrayList(); - //Merging to be done - //System.out.println("Date Match:"); -// for(int i=0;i getDates(String queryString){ - Matcher m = Pattern.compile("(\\d{4}/\\d{2}/\\d{2}|\\d{2}/\\d{2}/\\d{4}|\\d{2}-\\d{2}-\\d{4}|\\d{4}-\\d{2}-\\d{2})", - Pattern.CASE_INSENSITIVE).matcher(queryString); - ArrayList mydates=new ArrayList(); - while (m.find()) { - //System.out.println(m.group(1)); - mydates.add(m.group(1)); - } - return mydates; - } - - private static String convertDate(String date){ - String returnDate=date; - if(date.charAt(2)=='/'){ - String newDate[]=date.split("/"); - returnDate=newDate[2]+"-"+newDate[1]+"-"+newDate[0]; - } - else if(date.charAt(2)=='-'){ - String newDate[]=date.split("-"); - returnDate=newDate[2]+"-"+newDate[1]+"-"+newDate[0]; - } - else if(date.charAt(4)=='/'){ - returnDate=date.replace('/', '-'); - } - return returnDate; - } - - private static ArrayList processDates(ArrayList extractedDates){ - ArrayList processedDates=new ArrayList(); - for(int i=0;iterm and then adding the new //document. The delete and then add are atomic as seen @@ -138,4 +113,4 @@ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IO writer.updateDocument(new Term("path", file.toString()), doc); } } -} +} \ No newline at end of file diff --git a/src/lucene_read_file/NewJFrame1.form b/src/lucene_read_file/NewJFrame1.form new file mode 100644 index 0000000..a4e5cbc --- /dev/null +++ b/src/lucene_read_file/NewJFrame1.form @@ -0,0 +1,106 @@ + + +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
diff --git a/src/lucene_read_file/NewJFrame1.java b/src/lucene_read_file/NewJFrame1.java new file mode 100644 index 0000000..252d194 --- /dev/null +++ b/src/lucene_read_file/NewJFrame1.java @@ -0,0 +1,169 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package lucene_read_file; + +import java.util.ArrayList; +import java.util.logging.Level; +import java.util.logging.Logger; +/** + * + * @author Kaustubh + */ +public class NewJFrame1 extends javax.swing.JFrame { + + /** + * Creates new form NewJFrame1 + */ + public NewJFrame1() { + initComponents(); + jButton1.setEnabled(false); + } + + /** + * This method is called from within the constructor to initialize the form. + * WARNING: Do NOT modify this code. The content of this method is always + * regenerated by the Form Editor. + */ + @SuppressWarnings("unchecked") + // //GEN-BEGIN:initComponents + private void initComponents() { + + jLabel1 = new javax.swing.JLabel(); + jTextField1 = new javax.swing.JTextField(); + jButton1 = new javax.swing.JButton(); + jScrollPane1 = new javax.swing.JScrollPane(); + jTextArea1 = new javax.swing.JTextArea(); + jButton2 = new javax.swing.JButton(); + + setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE); + + jLabel1.setText("Query"); + + jButton1.setText("Search"); + jButton1.addActionListener(new java.awt.event.ActionListener() { + public void actionPerformed(java.awt.event.ActionEvent evt) { + jButton1ActionPerformed(evt); + } + }); + + jTextArea1.setColumns(20); + jTextArea1.setRows(5); + jScrollPane1.setViewportView(jTextArea1); + + jButton2.setText("Write Index"); + jButton2.addActionListener(new java.awt.event.ActionListener() { + public void actionPerformed(java.awt.event.ActionEvent evt) { + jButton2ActionPerformed(evt); + } + }); + + javax.swing.GroupLayout layout = new javax.swing.GroupLayout(getContentPane()); + getContentPane().setLayout(layout); + layout.setHorizontalGroup( + layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) + .addGroup(layout.createSequentialGroup() + .addComponent(jScrollPane1) + .addContainerGap()) + .addGroup(layout.createSequentialGroup() + .addGap(111, 111, 111) + .addComponent(jButton1) + .addGap(86, 86, 86) + .addComponent(jButton2) + .addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)) + .addGroup(layout.createSequentialGroup() + .addGap(28, 28, 28) + .addComponent(jLabel1, javax.swing.GroupLayout.PREFERRED_SIZE, 57, javax.swing.GroupLayout.PREFERRED_SIZE) + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED, 60, Short.MAX_VALUE) + .addComponent(jTextField1, javax.swing.GroupLayout.PREFERRED_SIZE, 291, javax.swing.GroupLayout.PREFERRED_SIZE) + .addGap(61, 61, 61)) + ); + layout.setVerticalGroup( + layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) + .addGroup(layout.createSequentialGroup() + .addGap(32, 32, 32) + .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING, false) + .addComponent(jLabel1, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE) + .addComponent(jTextField1, javax.swing.GroupLayout.DEFAULT_SIZE, 38, Short.MAX_VALUE)) + .addGap(18, 18, 18) + .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE) + .addComponent(jButton1) + .addComponent(jButton2)) + .addGap(48, 48, 48) + .addComponent(jScrollPane1, javax.swing.GroupLayout.DEFAULT_SIZE, 256, Short.MAX_VALUE) + .addContainerGap()) + ); + + pack(); + }// //GEN-END:initComponents + + private void jButton1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButton1ActionPerformed + // TODO add your handling code here: + LuceneReadIndexFromExample_new e =new LuceneReadIndexFromExample_new(); + String query = jTextField1.getText(); + jTextArea1.setText(" "); + try { + ArrayList res=e.pmain(query); + for(int i=0;i + /* If Nimbus (introduced in Java SE 6) is not available, stay with the default look and feel. + * For details see http://download.oracle.com/javase/tutorial/uiswing/lookandfeel/plaf.html + */ + try { + for (javax.swing.UIManager.LookAndFeelInfo info : javax.swing.UIManager.getInstalledLookAndFeels()) { + if ("Nimbus".equals(info.getName())) { + javax.swing.UIManager.setLookAndFeel(info.getClassName()); + break; + } + } + } catch (ClassNotFoundException ex) { + java.util.logging.Logger.getLogger(NewJFrame1.class.getName()).log(java.util.logging.Level.SEVERE, null, ex); + } catch (InstantiationException ex) { + java.util.logging.Logger.getLogger(NewJFrame1.class.getName()).log(java.util.logging.Level.SEVERE, null, ex); + } catch (IllegalAccessException ex) { + java.util.logging.Logger.getLogger(NewJFrame1.class.getName()).log(java.util.logging.Level.SEVERE, null, ex); + } catch (javax.swing.UnsupportedLookAndFeelException ex) { + java.util.logging.Logger.getLogger(NewJFrame1.class.getName()).log(java.util.logging.Level.SEVERE, null, ex); + } + // + + /* Create and display the form */ + java.awt.EventQueue.invokeLater(new Runnable() { + public void run() { + new NewJFrame1().setVisible(true); + + } + }); + } + + // Variables declaration - do not modify//GEN-BEGIN:variables + private javax.swing.JButton jButton1; + private javax.swing.JButton jButton2; + private javax.swing.JLabel jLabel1; + private javax.swing.JScrollPane jScrollPane1; + private javax.swing.JTextArea jTextArea1; + private javax.swing.JTextField jTextField1; + // End of variables declaration//GEN-END:variables +} diff --git a/src/lucene_read_file/new_index_writer.java b/src/lucene_read_file/new_index_writer.java index f68e606..d44e59f 100644 --- a/src/lucene_read_file/new_index_writer.java +++ b/src/lucene_read_file/new_index_writer.java @@ -52,19 +52,19 @@ public Similarity get(String name) { // return new BM25FSimilarity(/*k1*/1.4f, /*b*/0.9f); // } if (name.equals("body")) { - return new BM25FSimilarity(/*k1*/1.2f, /*b*/0.8f); + return new BM25FSimilarity(/*k1*/0.6f, /*b*/0.75f); } else if (name.equals("title")) { - return new BM25FSimilarity(/*k1*/1.2f, /*b*/0.8f); + return new BM25FSimilarity(/*k1*/0.6f, /*b*/0.75f); } else if (name.equals("date")) { - return new BM25FSimilarity(/*k1*/1.4f, /*b*/0.9f); + return new BM25FSimilarity(/*k1*/1.4f, /*b*/0.8f); } return new BM25FSimilarity(); } }; - public static void main(String[] args) + public static void smain() { //Input folder String docsPath = "myfiles"; @@ -86,7 +86,7 @@ public static void main(String[] args) //IndexWriter Configuration IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setSimilarity(perFieldSimilarities); - iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); +// iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); //IndexWriter writes new index files to the directory IndexWriter writer = new IndexWriter(dir, iwc);