001/** 002 * Copyright (C) 2006-2021 Talend Inc. - www.talend.com 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.talend.sdk.component.api.record; 017 018import java.nio.charset.Charset; 019import java.nio.charset.CharsetEncoder; 020import java.nio.charset.StandardCharsets; 021import java.time.temporal.Temporal; 022import java.util.Base64; 023import java.util.Collection; 024import java.util.Collections; 025import java.util.Date; 026import java.util.List; 027import java.util.Map; 028import java.util.Objects; 029import java.util.Optional; 030 031public interface Schema { 032 033 /** 034 * @return the type of this schema. 035 */ 036 Type getType(); 037 038 /** 039 * @return the nested element schema for arrays. 040 */ 041 Schema getElementSchema(); 042 043 /** 044 * @return the entries for records. 045 */ 046 List<Entry> getEntries(); 047 048 default Entry getEntry(final String name) { 049 return Optional 050 .ofNullable(this.getEntries()) // 051 .orElse(Collections.emptyList()) // 052 .stream() // 053 .filter((Entry e) -> Objects.equals(e.getName(), name)) // 054 .findFirst() // 055 .orElse(null); 056 } 057 058 /** 059 * @return the metadata props 060 */ 061 Map<String, String> getProps(); 062 063 /** 064 * @param property : property name. 065 * @return the requested metadata prop 066 */ 067 String getProp(String property); 068 069 enum Type { 070 RECORD(new Class<?>[] { Record.class }), 071 ARRAY(new Class<?>[] { Collection.class }), 072 STRING(new Class<?>[] { String.class }), 073 BYTES(new Class<?>[] { byte[].class, Byte[].class }), 074 INT(new Class<?>[] { Integer.class }), 075 LONG(new Class<?>[] { Long.class }), 076 FLOAT(new Class<?>[] { Float.class }), 077 DOUBLE(new Class<?>[] { Double.class }), 078 BOOLEAN(new Class<?>[] { Boolean.class }), 079 DATETIME(new Class<?>[] { Long.class, Date.class, Temporal.class }); 080 081 /** All compatibles Java classes */ 082 private final Class<?>[] classes; 083 084 Type(final Class<?>[] classes) { 085 this.classes = classes; 086 } 087 088 /** 089 * Check if input can be affected to an entry of this type. 090 * 091 * @param input : object. 092 * @return true if input is null or ok. 093 */ 094 public boolean isCompatible(final Object input) { 095 if (input == null) { 096 return true; 097 } 098 for (Class<?> clazz : classes) { 099 if (clazz.isInstance(input)) { 100 return true; 101 } 102 } 103 return false; 104 } 105 } 106 107 interface Entry { 108 109 /** 110 * @return The name of this entry. 111 */ 112 String getName(); 113 114 /** 115 * @return The raw name of this entry. 116 */ 117 String getRawName(); 118 119 /** 120 * @return the raw name of this entry if exists, else return name. 121 */ 122 String getOriginalFieldName(); 123 124 /** 125 * @return Type of the entry, this determine which other fields are populated. 126 */ 127 Type getType(); 128 129 /** 130 * @return Is this entry nullable or always valued. 131 */ 132 boolean isNullable(); 133 134 /** 135 * @param <T> the default value type. 136 * @return Default value for this entry. 137 */ 138 <T> T getDefaultValue(); 139 140 /** 141 * @return For type == record, the element type. 142 */ 143 Schema getElementSchema(); 144 145 /** 146 * @return Allows to associate to this field a comment - for doc purposes, no use in the runtime. 147 */ 148 String getComment(); 149 150 /** 151 * @return the metadata props 152 */ 153 Map<String, String> getProps(); 154 155 /** 156 * @param property : property name. 157 * @return the requested metadata prop 158 */ 159 String getProp(String property); 160 161 // Map<String, Object> metadata <-- DON'T DO THAT, ENSURE ANY META IS TYPED! 162 163 /** 164 * Plain builder matching {@link Entry} structure. 165 */ 166 interface Builder { 167 168 Builder withName(String name); 169 170 Builder withRawName(String rawName); 171 172 Builder withType(Type type); 173 174 Builder withNullable(boolean nullable); 175 176 <T> Builder withDefaultValue(T value); 177 178 Builder withElementSchema(Schema schema); 179 180 Builder withComment(String comment); 181 182 Builder withProps(Map<String, String> props); 183 184 Builder withProp(String key, String value); 185 186 Entry build(); 187 188 } 189 } 190 191 /** 192 * Allows to build a schema. 193 */ 194 interface Builder { 195 196 /** 197 * @param type schema type. 198 * @return this builder. 199 */ 200 Builder withType(Type type); 201 202 /** 203 * @param entry element for either an array or record type. 204 * @return this builder. 205 */ 206 Builder withEntry(Entry entry); 207 208 /** 209 * @param schema nested element schema. 210 * @return this builder. 211 */ 212 Builder withElementSchema(Schema schema); 213 214 /** 215 * @param props schema properties 216 * @return this builder 217 */ 218 Builder withProps(Map<String, String> props); 219 220 /** 221 * 222 * @param key the prop key name 223 * @param value the prop value 224 * @return this builder 225 */ 226 Builder withProp(String key, String value); 227 228 /** 229 * @return the described schema. 230 */ 231 Schema build(); 232 } 233 234 /** 235 * Sanitize name to be avro compatible. 236 * 237 * @param name : original name. 238 * @return avro compatible name. 239 */ 240 static String sanitizeConnectionName(final String name) { 241 if (name == null || name.isEmpty()) { 242 return name; 243 } 244 245 char current = name.charAt(0); 246 final CharsetEncoder ascii = Charset.forName(StandardCharsets.US_ASCII.name()).newEncoder(); 247 final boolean skipFirstChar = ((!ascii.canEncode(current)) || (!Character.isLetter(current) && current != '_')) 248 && name.length() > 1 && (!Character.isDigit(name.charAt(1))); 249 250 final StringBuilder sanitizedBuilder = new StringBuilder(); 251 252 if (!skipFirstChar) { 253 if (((!Character.isLetter(current)) && current != '_') || (!ascii.canEncode(current))) { 254 sanitizedBuilder.append('_'); 255 } else { 256 sanitizedBuilder.append(current); 257 } 258 } 259 for (int i = 1; i < name.length(); i++) { 260 current = name.charAt(i); 261 if (!ascii.canEncode(current)) { 262 if (Character.isLowerCase(current) || Character.isUpperCase(current)) { 263 sanitizedBuilder.append('_'); 264 } else { 265 final byte[] encoded = Base64.getEncoder().encode(name.substring(i, i + 1).getBytes()); 266 final String enc = new String(encoded); 267 if (sanitizedBuilder.length() == 0 && Character.isDigit(enc.charAt(0))) { 268 sanitizedBuilder.append('_'); 269 } 270 for (int iter = 0; iter < enc.length(); iter++) { 271 if (Character.isLetterOrDigit(enc.charAt(iter))) { 272 sanitizedBuilder.append(enc.charAt(iter)); 273 } else { 274 sanitizedBuilder.append('_'); 275 } 276 } 277 } 278 } else if (Character.isLetterOrDigit(current)) { 279 sanitizedBuilder.append(current); 280 } else { 281 sanitizedBuilder.append('_'); 282 } 283 284 } 285 return sanitizedBuilder.toString(); 286 } 287 288}