Skip to content

[BUG] #272

@ranjiniganeshan

Description

@ranjiniganeshan

Describe the bug

why logstash-ouput-opensearch plugin writes dupicate events to opensearch even after fingerprint evaluation?

error: [2025-03-16T00:07:37,943][INFO ][logstash.outputs.opensearch][main][f636cb73983bcc650332a7ed143a2c1655e1be6017f7947da7fc02b194adad2d] Retrying failed action {:status=>429, :action=>["index", {:_id=>nil, :_index=>"http_server-2025.03.14", :routing=>nil}, {"log"=>{"file"=>{"path"=>"C:/Program Files (x86)/Server/logs/localhost_access_log.log"}}, "event"=>{"hash"=>"c60225b5468e886bff692d09664d5e686b269476", "original"=>"[14/Mar/2025:11:26:10 +0000] 0:0:0:0:0:0:0:1 GET /remote/core.list-plugins HTTP/1.1 7354 200 [http-nio-8080-exec-1] [F2FC4D635FAE927072C1A24B03BD5F55.route1] admin 109ms\r"}, "url"=>{"uripath"=>"/remote/core.list-plugins"}, "@version"=>"1", "source"=>{"address"=>"0:0:0:0:0:0:0:1"}, "nodeRole"=>"http_server", "process"=>{"name"=>"http-nio-8080-exec-1"}, "http"=>{"user"=>"admin", "version"=>"1.1", "referrer"=>"F2FC4D635FAE927072C1A24B03BD5F55.route1", "response"=>{"body"=>{"bytes"=>7354}, "status_code"=>200, "time"=>109}, "request"=>{"method"=>"GET"}}, "message"=>"[14/Mar/2025:11:26:10 +0000] 0:0:0:0:0:0:0:1 GET /remote/core.list-plugins HTTP/1.1 7354 200 [http-nio-8080-exec-1] [F2FC4D635FAE927072C1A24B03BD5F55.route1] admin 109ms\r", "podName"=>"ntt", "type"=>"localhost_access_log", "host"=>{"name"=>"http_sever"}, "partition"=>"ntt0", "@timestamp"=>2025-03-14T11:26:10.000Z, "pr_name"=>"server-po"}], :error=>{"type"=>"cluster_block_exception", "reason"=>"index [http_server-2025.03.14] blocked by: [TOO_MANY_REQUESTS/12/disk usage exceeded flood-stage watermark, index has read-only-allow-delete block];"}}

To Reproduce
Steps to reproduce the behavior:

  1. Ingest log events from file (rotating everyday)
  2. Sends the log event which is already in opensearch.(screen shot attached)
  3. ISM policy of 2 days.
  4. Index timestamp is derived from the log message.

Expected behavior
Logstash should not write duplicate events while file rotation happens. This is causing issue to write to index when the index moves to readonly after 2 days ( configured in ISM policy)

Plugins
logstash.outputs.opensearch

Screenshots
attached

Host/Environment (please complete the following information):

  • windows: [e.g. iOS]
  • 8.17.3 [e.g. 22]

Additional context
complete details on logstash config.

2025-03-16T00:07:37,943][INFO ][logstash.outputs.opensearch][main][f636cb73983bcc650332a7ed143a2c1655e1be6017f7947da7fc02b194adad2d] Retrying failed action {:status=>429, :action=>["index", {:_id=>nil, :_index=>"http_server-2025.03.14", :routing=>nil}, {"log"=>{"file"=>{"path"=>"C:/Program Files (x86)/Server/logs/localhost_access_log.log"}}, "event"=>{"hash"=>"c60225b5468e886bff692d09664d5e686b269476", "original"=>"[14/Mar/2025:11:26:10 +0000] 0:0:0:0:0:0:0:1 GET /remote/core.list-plugins HTTP/1.1 7354 200 [http-nio-8080-exec-1] [F2FC4D635FAE927072C1A24B03BD5F55.route1] admin 109ms\r"}, "url"=>{"uripath"=>"/remote/core.list-plugins"}, "@version"=>"1", "source"=>{"address"=>"0:0:0:0:0:0:0:1"}, "nodeRole"=>"http_server", "process"=>{"name"=>"http-nio-8080-exec-1"}, "http"=>{"user"=>"admin", "version"=>"1.1", "referrer"=>"F2FC4D635FAE927072C1A24B03BD5F55.route1", "response"=>{"body"=>{"bytes"=>7354}, "status_code"=>200, "time"=>109}, "request"=>{"method"=>"GET"}}, "message"=>"[14/Mar/2025:11:26:10 +0000] 0:0:0:0:0:0:0:1 GET /remote/core.list-plugins HTTP/1.1 7354 200 [http-nio-8080-exec-1] [F2FC4D635FAE927072C1A24B03BD5F55.route1] admin 109ms\r", "podName"=>"ntt", "type"=>"localhost_access_log", "host"=>{"name"=>"http_sever"}, "partition"=>"ntt0", "@timestamp"=>2025-03-14T11:26:10.000Z, "pr_name"=>"server-po"}], :error=>{"type"=>"cluster_block_exception", "reason"=>"index [http_server-2025.03.14] blocked by: [TOO_MANY_REQUESTS/12/disk usage exceeded flood-stage watermark, index has read-only-allow-delete block];"}}

input {
  file {
    path => "C:/Program Files (x86)/Server/logs/localhost_access_log.log*"
    type => "localhost_access_log"
    start_position => "beginning"
    sincedb_path => "C:/setup/tools/.sincedb_localhost"
	ignore_older => 172800
	exclude => [ 
	   "C:/Program Files (x86)/Server/logs/localhost_access_log.log.[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9].log"  # Exclude rotated files with YYYY-MM-DD pattern
	]
  }
    ignore_older => 86400   # ignore files older than 24 hours
    close_older => 86400  # free the resources 
  }
  file {
    path => "C:/Program Files (x86)/Server/logs/orlog.log*"
    type => "orlog"
    codec => multiline {
      pattern => "^%{TIMESTAMP_ISO8601}"
      negate => true
      what => "previous"
      charset => "ISO-8859-1"
    }
    start_position => "beginning"
    ignore_older => 86400   # ignore files older than 24 hours
    close_older => 86400  # free the resources  
  }
}

filter {
  # Process logs of type localhost_access_log
  if [type] == "localhost_access_log" {
    grok {
      match => {
        "message" => [
         # Pattern 2: Matches structured HTTP logs
          "\[%{HAPROXYDATE:[@metadata][timestamp]}\] %{IPORHOST:[source][address]} %{WORD:[http][request][method]} %{URIPATH:[url][uripath]} HTTP/%{NUMBER:[http][version]} (?:-|%{INT:[http][response][body][bytes]:int}) %{INT:[http][response][status_code]:int} \[%{DATA:[process][name]}\] \[(?:-|%{DATA:[http][referrer]})\] (?:-|%{WORD:[http][user]}) %{NUMBER:[http][response][time]:int}ms",
		  # Pattern 1: Matches logs with timestamp + message
          "^\[%{HTTPDATE:[@metadata][timestamp]}\] %{GREEDYDATA:log_message}"
        ]
      }
    }

    # Convert extracted `[@metadata][timestamp]` to `@timestamp`
    date {
      match => [ "[@metadata][timestamp]", "dd/MMM/yyyy:HH:mm:ss Z" ]
      timezone => "UTC"
      target => "@timestamp"
      tag_on_failure => ["_dateparsefailure"]
    }

    # Debugging: Log any date parsing failures
    if "_dateparsefailure" in [tags] {
      mutate {
        add_field => { "failed_timestamp" => "%{[@metadata][timestamp]}" }
      }
    }

    # Handle Grok failures gracefully
    if "_grokparsefailure" in [tags] {
      mutate {
        add_field => { "log_message_raw" => "%{message}" }
        remove_tag => ["_grokparsefailure"]
      }
    }
  }

  # Process logs of type orlog
  if [type] == "orlog" {
    grok {
      match => {
        "message" => [ "%{TIMESTAMP_ISO8601:[@metadata][timestamp]} %{LOGLEVEL:[log][level]}%{SPACE}%{GREEDYDATA:msg}" ]
      }
    }
    date {
      match => ["[@metadata][timestamp]", "ISO8601"]
      timezone => "UTC" 
      target => "@timestamp"
    }
  }
}


filter {
  fingerprint {
    method => "SHA1"
    key => "103013"
	target => "fingerprint"
	source => ["[@metadata][timestamp]", "message"]
	concatenate_sources => true
  }
}

output {
    opensearch {
      ecs_compatibility => disabled
      hosts => ["https://***************************:443"]
      ssl => true
      document_id => "%{fingerprint}"
      index => "****************-%{+YYYY.MM.dd}"
    }
  }
}

C:/Program Files (x86)/Server/logs/localhost_access_log.log
C:/Program Files (x86)/Server/logs/localhost_access_log.2025-03-16
C:/Program Files (x86)/Server/logs/localhost_access_log.2025-03-15
C:/Program Files (x86)/Server/logs/localhost_access_log.2025-03-14

The content it is trying to write is from file C:/Program Files (x86)/Server/logs/localhost_access_log.2025-03-14
[14/Mar/2025:11:26:10 +0000] 0:0:0:0:0:0:0:1 GET /remote/core.list-plugins HTTP/1.1 7354 200 [http-nio-8080-exec-1] [F2FC4D635FAE927072C1A24B03BD5F55.route1] admin 109ms

this is already in available in opensearch. Attached screen shot.

I have ISM policy After two day the index will rollover to warm ( read only indices).
I have fingerprint implemented ( duplication happens here) fingerprint calculated HASH value is same.

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions