Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor phone context parsing and phone number normalizing logic. #3790

Open
wants to merge 3 commits into
base: refactor-regex-consts-cpp
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -267,9 +267,11 @@ set (
"src/phonenumbers/base/strings/string_piece.cc"
"src/phonenumbers/default_logger.cc"
"src/phonenumbers/logger.cc"
"src/phonenumbers/phonecontextparser.cc"
"src/phonenumbers/phonemetadata.pb.cc" # Generated by Protocol Buffers.
"src/phonenumbers/phonenumber.cc"
"src/phonenumbers/phonenumber.pb.cc" # Generated by Protocol Buffers.
"src/phonenumbers/phonenumbernormalizer.cc"
"src/phonenumbers/phonenumberutil.cc"
"src/phonenumbers/regex_based_matcher.cc"
"src/phonenumbers/regexpsandmappings.cc"
@@ -428,7 +430,7 @@ include_directories ("src")
# Collate dependencies
#----------------------------------------------------------------

set (LIBRARY_DEPS ${ICU_LIB} ${PROTOBUF_LIB} absl::node_hash_set absl::strings absl::synchronization)
set (LIBRARY_DEPS ${ICU_LIB} ${PROTOBUF_LIB} absl::node_hash_set absl::statusor absl::strings absl::synchronization)

if (USE_BOOST)
list (APPEND LIBRARY_DEPS ${Boost_LIBRARIES})
2 changes: 1 addition & 1 deletion cpp/src/phonenumbers/asyoutypeformatter.cc
Original file line number Diff line number Diff line change
@@ -711,7 +711,7 @@ char AsYouTypeFormatter::NormalizeAndAccrueDigitsAndPlusSign(
} else {
string number;
UnicodeString(next_char).toUTF8String(number);
phone_util_.NormalizeDigitsOnly(&number);
phone_util_.phone_number_normalizer_->NormalizeDigitsOnly(&number);
accrued_input_without_formatting_.append(next_char);
national_number_.append(number);
normalized_char = number[0];
5 changes: 5 additions & 0 deletions cpp/src/phonenumbers/constants.h
Original file line number Diff line number Diff line change
@@ -21,7 +21,9 @@ namespace i18n {
namespace phonenumbers {

class Constants {
friend class PhoneContextParser;
friend class PhoneNumberMatcherRegExps;
friend class PhoneNumberNormalizer;
friend class PhoneNumberRegExpsAndMappings;
friend class PhoneNumberUtil;

@@ -33,6 +35,7 @@ class Constants {

static constexpr char kRfc3966ExtnPrefix[] = ";ext=";
static constexpr char kRfc3966VisualSeparator[] = "[\\-\\.\\(\\)]?";
static constexpr char kRfc3966PhoneContext[] = ";phone-context=";

static constexpr char kDigits[] = "\\p{Nd}";

@@ -53,6 +56,8 @@ class Constants {

// The minimum and maximum length of the national significant number.
static constexpr size_t kMinLengthForNsn = 2;
// The maximum length of the country calling code.
static constexpr size_t kMaxLengthCountryCode = 3;

static constexpr char kPlusChars[] = "+\xEF\xBC\x8B"; /* "++" */

2 changes: 2 additions & 0 deletions cpp/src/phonenumbers/normalize_utf8.h
Original file line number Diff line number Diff line change
@@ -14,6 +14,8 @@

#include <string>

#include <unicode/uchar.h>

#include "phonenumbers/utf/unicodetext.h"

namespace i18n {
125 changes: 125 additions & 0 deletions cpp/src/phonenumbers/phonecontextparser.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// Copyright (C) 2025 The Libphonenumber Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "phonenumbers/phonecontextparser.h"

#include <string>

#include "phonenumbers/constants.h"

namespace i18n {
namespace phonenumbers {

PhoneContextParser::PhoneContextParser(
std::unique_ptr<std::vector<int>> country_calling_codes,
std::shared_ptr<PhoneNumberRegExpsAndMappings> reg_exps,
std::shared_ptr<PhoneNumberNormalizer> normalizer)
: country_calling_codes_(std::move(country_calling_codes)),
reg_exps_(reg_exps),
normalizer_(normalizer) {}

std::optional<absl::string_view> PhoneContextParser::ExtractPhoneContext(
const absl::string_view phone_number) {
size_t index_of_phone_context =
phone_number.find(Constants::kRfc3966PhoneContext);

if (index_of_phone_context == std::string::npos) {
return std::nullopt;
}

size_t phone_context_start =
index_of_phone_context + strlen(Constants::kRfc3966PhoneContext);

// If phone-context parameter is empty
if (phone_context_start >= phone_number.length()) {
return "";
}

size_t phone_context_end = phone_number.find(';', phone_context_start);
// If phone-context is the last parameter
if (phone_context_end == std::string::npos) {
return phone_number.substr(phone_context_start);
}

return phone_number.substr(phone_context_start,
phone_context_end - phone_context_start);
}

bool PhoneContextParser::isValid(absl::string_view phone_context) {
if (phone_context.empty()) {
return false;
}

// Does phone-context value match the global number digits pattern or the
// domain name pattern?
return reg_exps_->rfc3966_global_number_digits_pattern_->FullMatch(
std::string{phone_context}) ||
reg_exps_->rfc3966_domainname_pattern_->FullMatch(
std::string{phone_context});
}

bool PhoneContextParser::isValidCountryCode(int country_code) {
return std::find(country_calling_codes_->begin(),
country_calling_codes_->end(),
country_code) != country_calling_codes_->end();
}

PhoneContextParser::PhoneContext PhoneContextParser::ParsePhoneContext(
absl::string_view phone_context) {
PhoneContextParser::PhoneContext phone_context_object;
phone_context_object.raw_context = phone_context;
phone_context_object.country_code = std::nullopt;

// Ignore phone-context values that do not start with a plus sign. Could be a
// domain name.
if (!phone_context.empty() &&
phone_context.at(0) == Constants::kPlusSign[0]) {
return phone_context_object;
}

// Remove the plus sign from the phone context and normalize the digits.
std::string normalized_phone_context = std::string(phone_context.substr(1));
normalizer_->NormalizeDigitsOnly(&normalized_phone_context);

if (normalized_phone_context.empty() ||
normalized_phone_context.length() > Constants::kMaxLengthCountryCode) {
return phone_context_object;
}

int potential_country_code = std::stoi(normalized_phone_context, nullptr, 10);
if (!isValidCountryCode(potential_country_code)) {
return phone_context_object;
}

phone_context_object.country_code = potential_country_code;
return phone_context_object;
}

absl::StatusOr<std::optional<PhoneContextParser::PhoneContext>>
PhoneContextParser::Parse(absl::string_view phone_number) {
std::optional<absl::string_view> phone_context =
ExtractPhoneContext(phone_number);
if (!phone_context.has_value()) {
return std::nullopt;
}

if (!isValid(phone_context.value())) {
return absl::InvalidArgumentError("Phone context is invalid.");
}

return ParsePhoneContext(phone_context.value());
}

} // namespace phonenumbers
} // namespace i18n
81 changes: 81 additions & 0 deletions cpp/src/phonenumbers/phonecontextparser.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Copyright (C) 2025 The Libphonenumber Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef I18N_PHONENUMBERS_PHONECONTEXTPARSER_H_
#define I18N_PHONENUMBERS_PHONECONTEXTPARSER_H_

#include <memory>
#include <optional>
#include <vector>
#include <string>

#include "absl/status/statusor.h"
#include "phonenumbers/phonenumbernormalizer.h"
#include "phonenumbers/regexpsandmappings.h"

namespace i18n {
namespace phonenumbers {

// Parses the phone-context parameter of a phone number in RFC3966 format.
class PhoneContextParser {
friend class PhoneNumberUtil;
friend class PhoneContextParserTest;

private:
struct PhoneContext {
// The raw value of the phone-context parameter.
std::string raw_context;

// The country code of the phone-context parameter if the phone-context is
// exactly and only a + followed by a valid country code.
std::optional<int> country_code;
};

PhoneContextParser(std::unique_ptr<std::vector<int>> country_calling_codes,
std::shared_ptr<PhoneNumberRegExpsAndMappings> reg_exps,
std::shared_ptr<PhoneNumberNormalizer> normalizer);

// Parses the phone-context parameter of a phone number in RFC3966 format.
// If the phone-context parameter is not present, returns std::nullopt. If it
// is present but invalid, returns an error status. If it is present and
// valid, returns a PhoneContext object. This object contains the raw value of
// the phone-context parameter. Additionally, if the phone-context is exactly
// and only a + followed by a valid country code, it also contains the country
// code.
absl::StatusOr<std::optional<PhoneContextParser::PhoneContext>> Parse(
absl::string_view phone_number);

std::unique_ptr<std::vector<int>> country_calling_codes_;
std::shared_ptr<PhoneNumberRegExpsAndMappings> reg_exps_;
std::shared_ptr<PhoneNumberNormalizer> normalizer_;

// Extracts the value of the phone-context parameter, following the
// specification of RFC3966.
static std::optional<absl::string_view> ExtractPhoneContext(
absl::string_view phone_number);

// Checks whether the phone context value follows the specification of
// RFC3966.
bool isValid(absl::string_view phone_context);

bool isValidCountryCode(int country_code);

// Parses the phone context value into a PhoneContext object.
PhoneContext ParsePhoneContext(absl::string_view phone_context);
};

} // namespace phonenumbers
} // namespace i18n

#endif // I18N_PHONENUMBERS_PHONECONTEXTPARSER_H_
Loading