
/* Copyright 2002-2023 CS GROUP
 * Licensed to CS GROUP (CS) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * CS licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *   http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.
package org.orekit.frames;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.SortedSet;
import java.util.function.Supplier;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.orekit.data.DataProvidersManager;
import org.orekit.errors.OrekitException;
import org.orekit.errors.OrekitMessages;
import org.orekit.time.AbsoluteDate;
import org.orekit.time.DateComponents;
import org.orekit.time.TimeComponents;
import org.orekit.time.TimeScale;
import org.orekit.utils.Constants;
import org.orekit.utils.IERSConventions;
import org.orekit.utils.IERSConventions.NutationCorrectionConverter;

/** Loader for EOP C04 files.
 * <p>EOP C04 files contain {@link EOPEntry
 * Earth Orientation Parameters} consistent with ITRF20xx for one year periods, with various
 * xx (05, 08, 14, 20) depending on the data source.</p>
 * <p>The EOP C04 files retrieved from the old ftp site
 * <a href="ftp://ftp.iers.org/products/eop/long-term/">ftp://ftp.iers.org/products/eop/long-term/</a>
 * were recognized thanks to their base names, which must match one of the patterns
 * {@code eopc04_##_IAU2000.##} or {@code eopc04_##.##} (or the same ending with <code>.gz</code> for
 * gzip-compressed files) where # stands for a digit character. As of early 2023, this ftp site
 * seems not to be accessible anymore.</p>
 * <p>
 * The official source for these files is now the web site
 * <a href="https://hpiers.obspm.fr/eoppc/eop/">https://hpiers.obspm.fr/eoppc/eop/</a>. These
 * files do <em>not</em> follow the old naming convention that was used in the older ftp site.
 * They lack the _05, _08 or _14 markers in the file names. The ITRF year appears only in the URL
 * (with directories eopc04_05, eop04_c08…). The directory for the current data is named eopc04
 * without any suffix. So before 2023-02-14 the eopc04 directory would contain files compatible with
 * ITRF2014 and after 2023-02-14 it would contain files compatible with ITRF2020. In each directory,
 * the files don't have any marker, hence users downloading eopc04.99 file from eopc04_05 would get
 * a file compatible with ITRF2005 whereas users downloading a file with the exact same name eopc04.99
 * but from eop04_c08 would get a file compatible with ITRF2008.
 * </p>
 * <p>
 * Starting with Orekit version 12.0, the ITRF year is retrieved by analyzing the file header, it is
 * not linked to file name anymore, hence it is compatible with any IERS site layout.
 * </p>
 * <p>
 * This class is immutable and hence thread-safe
 * </p>
 * @author Luc Maisonobe
class EopC04FilesLoader extends AbstractEopLoader implements EopHistoryLoader {

    /** Build a loader for IERS EOP C04 files.
     * @param supportedNames regular expression for supported files names
     * @param manager provides access to the EOP C04 files.
     * @param utcSupplier UTC time scale.
    EopC04FilesLoader(final String supportedNames,
                      final DataProvidersManager manager,
                      final Supplier<TimeScale> utcSupplier) {
        super(supportedNames, manager, utcSupplier);

    /** {@inheritDoc} */
    public void fillHistory(final IERSConventions.NutationCorrectionConverter converter,
                            final SortedSet<EOPEntry> history) {
        final Parser parser = new Parser(converter, getUtc());
        final EopParserLoader loader = new EopParserLoader(parser);

    /** Internal class performing the parsing. */
    static class Parser extends AbstractEopParser {

        /** Simple constructor.
         * @param converter converter to use
         * @param utc       time scale for parsing dates.
        Parser(final NutationCorrectionConverter converter,
               final TimeScale utc) {
            super(converter, null, utc);

        /** {@inheritDoc} */
        public Collection<EOPEntry> parse(final InputStream input, final String name)
            throws IOException, OrekitException {

            final List<EOPEntry> history = new ArrayList<>();

            // set up a reader for line-oriented EOP C04 files
            try (BufferedReader reader = new BufferedReader(new InputStreamReader(input, StandardCharsets.UTF_8))) {
                // reset parse info to start new file (do not clear history!)
                int lineNumber   = 0;
                boolean inHeader = true;
                final LineParser[] tentativeParsers = new LineParser[] {
                    new LineWithoutRatesParser(name),
                    new LineWithRatesParser(name)
                LineParser selectedParser = null;

                // read all file
                for (String line = reader.readLine(); line != null; line = reader.readLine()) {
                    boolean parsed = false;

                    if (inHeader) {
                        // maybe it's an header line
                        for (final LineParser parser : tentativeParsers) {
                            if (parser.parseHeaderLine(line)) {
                                // we recognized one EOP C04 format
                                selectedParser = parser;

                    if (selectedParser != null) {
                        // maybe it's a data line
                        final EOPEntry entry = selectedParser.parseDataLine(line);
                        if (entry != null) {

                            // this is a data line, build an entry from the extracted fields
                            parsed = true;

                            // we know we have already finished header
                            inHeader = false;


                    if (!(inHeader || parsed)) {
                        throw new OrekitException(OrekitMessages.UNABLE_TO_PARSE_LINE_IN_FILE,
                                lineNumber, name, line);

                // check if we have read something
                if (inHeader) {
                    throw new OrekitException(OrekitMessages.NOT_A_SUPPORTED_IERS_DATA_FILE, name);

            return history;

        /** Base parser for EOP C04 lines.
         * @since 12.0
        private abstract class LineParser {

            /** Pattern for ITRF version. */
            private final Pattern itrfVersionPattern;

            /** Pattern for columns header. */
            private final Pattern columnHeaderPattern;

            /** Pattern for data lines. */
            private final Pattern dataPattern;

            /** Year group. */
            private final int yearGroup;

            /** Month group. */
            private final int monthGroup;

            /** Day group. */
            private final int dayGroup;

            /** MJD group. */
            private final int mjdGroup;

            /** Name of the stream for error messages. */
            private final String name;

            /** ITRF version. */
            private ITRFVersion itrfVersion;

            /** Simple constructor.
             * @param itrfVersionRegexp regular expression for ITRF version
             * @param columnsHeaderRegexp regular expression for columns header
             * @param dataRegexp regular expression for data lines
             * @param yearGroup year group
             * @param monthGroup month group
             * @param dayGroup day group
             * @param mjdGroup MJD group
             * @param name  of the stream for error messages.
            protected LineParser(final String itrfVersionRegexp, final String columnsHeaderRegexp,
                                 final String dataRegexp,
                                 final int yearGroup, final int monthGroup, final int dayGroup,
                                 final int mjdGroup, final String name) {
                this.itrfVersionPattern  = Pattern.compile(itrfVersionRegexp);
                this.columnHeaderPattern = Pattern.compile(columnsHeaderRegexp);
                this.dataPattern         = Pattern.compile(dataRegexp);
                this.yearGroup           = yearGroup;
                this.monthGroup          = monthGroup;
                this.dayGroup            = dayGroup;
                this.mjdGroup            = mjdGroup;
                this.name                = name;

            /** Get the ITRF version for this EOP C04 file.
             * @return ITRF version
            protected ITRFVersion getItrfVersion() {
                return itrfVersion;

            /** Parse a header line.
             * @param line line to parse
             * @return true if line was recognized (either ITRF version or columns header)
            public boolean parseHeaderLine(final String line) {
                final Matcher itrfVersionMatcher = itrfVersionPattern.matcher(line);
                if (itrfVersionMatcher.matches()) {
                    switch (Integer.parseInt(itrfVersionMatcher.group(1))) {
                        case 5 :
                            itrfVersion = ITRFVersion.ITRF_2005;
                        case 8 :
                            itrfVersion = ITRFVersion.ITRF_2008;
                        case 14 :
                            itrfVersion = ITRFVersion.ITRF_2014;
                        case 20 :
                            itrfVersion = ITRFVersion.ITRF_2020;
                        default :
                            throw new OrekitException(OrekitMessages.NO_SUCH_ITRF_FRAME, itrfVersionMatcher.group(1));
                    return true;
                } else {
                    final Matcher columnHeaderMatcher = columnHeaderPattern.matcher(line);
                    if (columnHeaderMatcher.matches()) {
                        return true;
                    return false;

            /** Parse a data line.
             * @param line line to parse
             * @return EOP entry for the line, or null if line does not match expected regular expression
            public EOPEntry parseDataLine(final String line) {

                final Matcher matcher = dataPattern.matcher(line);
                if (!matcher.matches()) {
                    // this is not a data line
                    return null;

                // check date
                final DateComponents dc = new DateComponents(Integer.parseInt(matcher.group(yearGroup)),
                final int    mjd   = Integer.parseInt(matcher.group(mjdGroup));
                if (dc.getMJD() != mjd) {
                    throw new OrekitException(OrekitMessages.INCONSISTENT_DATES_IN_IERS_FILE,
                                              name, dc.getYear(), dc.getMonth(), dc.getDay(), mjd);

                return parseDataLine(matcher, dc);


            /** Parse a columns header line.
             * @param matcher matcher for line
            protected abstract void parseColumnsHeaderLine(Matcher matcher);

            /** Parse a data line.
             * @param matcher matcher for line
             * @param dc date components already extracted from the line
             * @return EOP entry for the line
            protected abstract EOPEntry parseDataLine(Matcher matcher, DateComponents dc);


        /** Parser for data lines without pole rates.
         * <p>
         * ITRF markers have either the following form:
         * </p>
         * <pre>
         *                           EOP (IERS) 05 C04
         * </pre>
         * <p>
         * or the following form:
         * </p>
         * <pre>
         *                           EOP (IERS) 14 C04 TIME SERIES
         * </pre>
         * <p>
         * Header have either the following form:
         * </p>
         * <pre>
         *       Date      MJD      x          y        UT1-UTC       LOD         dPsi      dEps       x Err     y Err   UT1-UTC Err  LOD Err    dPsi Err   dEpsilon Err
         *                          "          "           s           s            "         "        "          "          s           s            "         "
         *      (0h UTC)
         * </pre>
         * <p>
         * or the following form:
         * </p>
         * <pre>
         *       Date      MJD      x          y        UT1-UTC       LOD         dX        dY        x Err     y Err   UT1-UTC Err  LOD Err     dX Err       dY Err
         *                          "          "           s           s          "         "           "          "          s         s            "           "
         *      (0h UTC)
         * </pre>
         * <p>
         * The data lines in the EOP C04 yearly data files have either the following fixed form:
         * </p>
         * <pre>
         * year month day MJD …12 floating values fields in decimal format...
         * 2000   1   1  51544   0.043242   0.377915   0.3554777   …
         * 2000   1   2  51545   0.043515   0.377753   0.3546065   …
         * 2000   1   3  51546   0.043623   0.377452   0.3538444   …
         * </pre>
         * @since 12.0
        private class LineWithoutRatesParser extends LineParser {

            /** Nutation header group. */
            private static final int NUTATION_HEADER_GROUP = 1;

            /** Year group. */
            private static final int YEAR_GROUP = 1;

            /** Month group. */
            private static final int MONTH_GROUP = 2;

            /** Day group. */
            private static final int DAY_GROUP = 3;

            /** MJD group. */
            private static final int MJD_GROUP = 4;

            /** X component of pole motion group. */
            private static final int POLE_X_GROUP = 5;

            /** Y component of pole motion group. */
            private static final int POLE_Y_GROUP = 6;

            /** UT1-UTC group. */
            private static final int UT1_UTC_GROUP = 7;

            /** LoD group. */
            private static final int LOD_GROUP = 8;

            /** Correction for nutation first field (either dX or dPsi). */
            private static final int NUT_0_GROUP = 9;

            /** Correction for nutation second field (either dY or dEps). */
            private static final int NUT_1_GROUP = 10;

            /** Indicator for non-rotating origin. */
            private boolean isNonRotatingOrigin;

            /** Simple constructor.
             * @param name  of the stream for error messages.
            LineWithoutRatesParser(final String name) {
                super("^ +EOP +\\(IERS\\) +([0-9][0-9]) +C04.*",
                      "^ *Date +MJD +x +y +UT1-UTC +LOD +((?:dPsi +dEps)|(?:dX +dY)) .*",
                      "^(\\d+) +(\\d+) +(\\d+) +(\\d+) +(-?\\d+\\.\\d+) +(-?\\d+\\.\\d+) +(-?\\d+\\.\\d+) +(-?\\d+\\.\\d+) +(-?\\d+\\.\\d+) +(-?\\d+\\.\\d+)(?: +(-?\\d+\\.\\d+)){6}$",

            /** {@inheritDoc} */
            protected void parseColumnsHeaderLine(final Matcher matcher) {
                isNonRotatingOrigin = matcher.group(NUTATION_HEADER_GROUP).startsWith("dX");

            /** {@inheritDoc} */
            protected EOPEntry parseDataLine(final Matcher matcher, final DateComponents dc) {

                final AbsoluteDate date = new AbsoluteDate(dc, getUtc());

                final double x     = Double.parseDouble(matcher.group(POLE_X_GROUP)) * Constants.ARC_SECONDS_TO_RADIANS;
                final double y     = Double.parseDouble(matcher.group(POLE_Y_GROUP)) * Constants.ARC_SECONDS_TO_RADIANS;
                final double dtu1  = Double.parseDouble(matcher.group(UT1_UTC_GROUP));
                final double lod   = Double.parseDouble(matcher.group(LOD_GROUP));
                final double[] equinox;
                final double[] nro;
                if (isNonRotatingOrigin) {
                    nro = new double[] {
                        Double.parseDouble(matcher.group(NUT_0_GROUP)) * Constants.ARC_SECONDS_TO_RADIANS,
                        Double.parseDouble(matcher.group(NUT_1_GROUP)) * Constants.ARC_SECONDS_TO_RADIANS
                    equinox = getConverter().toEquinox(date, nro[0], nro[1]);
                } else {
                    equinox = new double[] {
                        Double.parseDouble(matcher.group(NUT_0_GROUP)) * Constants.ARC_SECONDS_TO_RADIANS,
                        Double.parseDouble(matcher.group(NUT_1_GROUP)) * Constants.ARC_SECONDS_TO_RADIANS
                    nro = getConverter().toNonRotating(date, equinox[0], equinox[1]);

                return new EOPEntry(dc.getMJD(), dtu1, lod, x, y, Double.NaN, Double.NaN,
                                    equinox[0], equinox[1], nro[0], nro[1],
                                    getItrfVersion(), date);


        /** Parser for data lines with pole rates.
         * <p>
         * ITRF markers have either the following form:
         * </p>
         * <pre>
         * # EOP (IERS) 20 C04 TIME SERIES  consistent with ITRF 2020 - sampled at 0h UTC
         * </pre>
         * <p>
         * Header have either the following form:
         * </p>
         * <pre>
         * # YR  MM  DD  HH       MJD        x(")        y(")  UT1-UTC(s)       dX(")      dY(")       xrt(")      yrt(")      LOD(s)        x Er        y Er  UT1-UTC Er      dX Er       dY Er       xrt Er      yrt Er      LOD Er
         * </pre>
         * <p>
         * The data lines in the EOP C04 yearly data files have either the following fixed form:
         * </p>
         * <pre>
         * year month day hour MJD (in floating format) …16 floating values fields in decimal format...
         * 2015   1   1  12  57023.50    0.030148    0.281014   …
         * 2015   1   2  12  57024.50    0.029219    0.281441   …
         * 2015   1   3  12  57025.50    0.028777    0.281824   …
         * </pre>
         * @since 12.0
        private class LineWithRatesParser extends LineParser {

            /** Year group. */
            private static final int YEAR_GROUP = 1;

            /** Month group. */
            private static final int MONTH_GROUP = 2;

            /** Day group. */
            private static final int DAY_GROUP = 3;

            /** Hour group. */
            private static final int HOUR_GROUP = 4;

            /** MJD group. */
            private static final int MJD_GROUP = 5;

            /** X component of pole motion group. */
            private static final int POLE_X_GROUP = 6;

            /** Y component of pole motion group. */
            private static final int POLE_Y_GROUP = 7;

            /** UT1-UTC group. */
            private static final int UT1_UTC_GROUP = 8;

            /** Correction for nutation first field. */
            private static final int NUT_DX_GROUP = 9;

            /** Correction for nutation second field. */
            private static final int NUT_DY_GROUP = 10;

            /** X rate component of pole motion group.
             * @since 12.0
            private static final int POLE_X_RATE_GROUP = 11;

            /** Y rate component of pole motion group.
             * @since 12.0
            private static final int POLE_Y_RATE_GROUP = 12;

            /** LoD group. */
            private static final int LOD_GROUP = 13;

            /** Simple constructor.
             * @param name  of the stream for error messages.
            LineWithRatesParser(final String name) {
                super("^# +EOP +\\(IERS\\) +([0-9][0-9]) +C04.*",
                      "^# +YR +MM +DD +H +MJD +x\\(\"\\) +y\\(\"\\) +UT1-UTC\\(s\\) +dX\\(\"\\) +dY\\(\"\\) +xrt\\(\"\\) +yrt\\'\"\\) +.*",
                      "^(\\d+) +(\\d+) +(\\d+) +(\\d+) +(\\d+)\\.\\d+ +(-?\\d+\\.\\d+) +(-?\\d+\\.\\d+) +(-?\\d+\\.\\d+) +(-?\\d+\\.\\d+) +(-?\\d+\\.\\d+) +(-?\\d+\\.\\d+) +(-?\\d+\\.\\d+) +(-?\\d+\\.\\d+)(?: +(-?\\d+\\.\\d+)){8}$", // we intentionally ignore MJD fractional part

            /** {@inheritDoc} */
            protected void parseColumnsHeaderLine(final Matcher matcher) {
                // nothing to do here

            /** {@inheritDoc} */
            protected EOPEntry parseDataLine(final Matcher matcher, final DateComponents dc) {

                final TimeComponents tc = new TimeComponents(Integer.parseInt(matcher.group(HOUR_GROUP)), 0, 0.0);
                final AbsoluteDate date = new AbsoluteDate(dc, tc, getUtc());

                final double x     = Double.parseDouble(matcher.group(POLE_X_GROUP)) * Constants.ARC_SECONDS_TO_RADIANS;
                final double y     = Double.parseDouble(matcher.group(POLE_Y_GROUP)) * Constants.ARC_SECONDS_TO_RADIANS;
                final double xRate = Double.parseDouble(matcher.group(POLE_X_RATE_GROUP)) *
                                     Constants.ARC_SECONDS_TO_RADIANS / Constants.JULIAN_DAY;
                final double yRate = Double.parseDouble(matcher.group(POLE_Y_RATE_GROUP)) *
                                     Constants.ARC_SECONDS_TO_RADIANS / Constants.JULIAN_DAY;
                final double dtu1  = Double.parseDouble(matcher.group(UT1_UTC_GROUP));
                final double lod   = Double.parseDouble(matcher.group(LOD_GROUP));
                final double[] nro = new double[] {
                    Double.parseDouble(matcher.group(NUT_DX_GROUP)) * Constants.ARC_SECONDS_TO_RADIANS,
                    Double.parseDouble(matcher.group(NUT_DY_GROUP)) * Constants.ARC_SECONDS_TO_RADIANS
                final double[] equinox = getConverter().toEquinox(date, nro[0], nro[1]);

                return new EOPEntry(dc.getMJD(), dtu1, lod, x, y, xRate, yRate,
                                    equinox[0], equinox[1], nro[0], nro[1],
                                    getItrfVersion(), date);


