001/** 002 * Copyright (C) 2006-2021 Talend Inc. - www.talend.com 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.talend.sdk.component.api.record; 017 018import java.io.StringReader; 019import java.nio.charset.Charset; 020import java.nio.charset.CharsetEncoder; 021import java.nio.charset.StandardCharsets; 022import java.time.temporal.Temporal; 023import java.util.Base64; 024import java.util.Collection; 025import java.util.Collections; 026import java.util.Date; 027import java.util.List; 028import java.util.Map; 029import java.util.Objects; 030import java.util.Optional; 031import java.util.stream.Stream; 032 033import javax.json.Json; 034import javax.json.JsonValue; 035 036public interface Schema { 037 038 /** 039 * @return the type of this schema. 040 */ 041 Type getType(); 042 043 /** 044 * @return the nested element schema for arrays. 045 */ 046 Schema getElementSchema(); 047 048 /** 049 * @return the data entries for records (not contains meta data entries). 050 */ 051 List<Entry> getEntries(); 052 053 /** 054 * @return the metadata entries for records (not contains ordinary data entries). 055 */ 056 List<Entry> getMetadata(); 057 058 /** 059 * @return All entries, including data and metadata, of this schema. 060 */ 061 Stream<Entry> getAllEntries(); 062 063 default Entry getEntry(final String name) { 064 return Optional 065 .ofNullable(getEntries()) // 066 .orElse(Collections.emptyList()) // 067 .stream() // 068 .filter((Entry e) -> Objects.equals(e.getName(), name)) // 069 .findFirst() // 070 .orElse(null); 071 } 072 073 /** 074 * @return the metadata props 075 */ 076 Map<String, String> getProps(); 077 078 /** 079 * @param property : property name. 080 * @return the requested metadata prop 081 */ 082 String getProp(String property); 083 084 /** 085 * Get a property values from schema with its name. 086 * 087 * @param name : property's name. 088 * @return property's value. 089 */ 090 default JsonValue getJsonProp(final String name) { 091 final String prop = this.getProp(name); 092 if (prop == null) { 093 return null; 094 } 095 try { 096 return Json.createParser(new StringReader(prop)).getValue(); 097 } catch (RuntimeException ex) { 098 return Json.createValue(prop); 099 } 100 } 101 102 enum Type { 103 RECORD(new Class<?>[] { Record.class }), 104 ARRAY(new Class<?>[] { Collection.class }), 105 STRING(new Class<?>[] { String.class }), 106 BYTES(new Class<?>[] { byte[].class, Byte[].class }), 107 INT(new Class<?>[] { Integer.class }), 108 LONG(new Class<?>[] { Long.class }), 109 FLOAT(new Class<?>[] { Float.class }), 110 DOUBLE(new Class<?>[] { Double.class }), 111 BOOLEAN(new Class<?>[] { Boolean.class }), 112 DATETIME(new Class<?>[] { Long.class, Date.class, Temporal.class }); 113 114 /** All compatibles Java classes */ 115 private final Class<?>[] classes; 116 117 Type(final Class<?>[] classes) { 118 this.classes = classes; 119 } 120 121 /** 122 * Check if input can be affected to an entry of this type. 123 * 124 * @param input : object. 125 * @return true if input is null or ok. 126 */ 127 public boolean isCompatible(final Object input) { 128 if (input == null) { 129 return true; 130 } 131 for (final Class<?> clazz : classes) { 132 if (clazz.isInstance(input)) { 133 return true; 134 } 135 } 136 return false; 137 } 138 } 139 140 interface Entry { 141 142 /** 143 * @return The name of this entry. 144 */ 145 String getName(); 146 147 /** 148 * @return The raw name of this entry. 149 */ 150 String getRawName(); 151 152 /** 153 * @return the raw name of this entry if exists, else return name. 154 */ 155 String getOriginalFieldName(); 156 157 /** 158 * @return Type of the entry, this determine which other fields are populated. 159 */ 160 Type getType(); 161 162 /** 163 * @return Is this entry nullable or always valued. 164 */ 165 boolean isNullable(); 166 167 /** 168 * @return true if this entry is for metadata; false for ordinary data. 169 */ 170 boolean isMetadata(); 171 172 /** 173 * @param <T> the default value type. 174 * @return Default value for this entry. 175 */ 176 <T> T getDefaultValue(); 177 178 /** 179 * @return For type == record, the element type. 180 */ 181 Schema getElementSchema(); 182 183 /** 184 * @return Allows to associate to this field a comment - for doc purposes, no use in the runtime. 185 */ 186 String getComment(); 187 188 /** 189 * @return the metadata props 190 */ 191 Map<String, String> getProps(); 192 193 /** 194 * @param property : property name. 195 * @return the requested metadata prop 196 */ 197 String getProp(String property); 198 199 /** 200 * Get a property values from entry with its name. 201 * 202 * @param name : property's name. 203 * @return property's value. 204 */ 205 default JsonValue getJsonProp(final String name) { 206 final String prop = this.getProp(name); 207 if (prop == null) { 208 return null; 209 } 210 try { 211 return Json.createParser(new StringReader(prop)).getValue(); 212 } catch (RuntimeException ex) { 213 return Json.createValue(prop); 214 } 215 } 216 217 // Map<String, Object> metadata <-- DON'T DO THAT, ENSURE ANY META IS TYPED! 218 219 /** 220 * Plain builder matching {@link Entry} structure. 221 */ 222 interface Builder { 223 224 Builder withName(String name); 225 226 Builder withRawName(String rawName); 227 228 Builder withType(Type type); 229 230 Builder withNullable(boolean nullable); 231 232 Builder withMetadata(boolean metadata); 233 234 <T> Builder withDefaultValue(T value); 235 236 Builder withElementSchema(Schema schema); 237 238 Builder withComment(String comment); 239 240 Builder withProps(Map<String, String> props); 241 242 Builder withProp(String key, String value); 243 244 Entry build(); 245 246 } 247 } 248 249 /** 250 * Allows to build a schema. 251 */ 252 interface Builder { 253 254 /** 255 * @param type schema type. 256 * @return this builder. 257 */ 258 Builder withType(Type type); 259 260 /** 261 * @param entry element for either an array or record type. 262 * @return this builder. 263 */ 264 Builder withEntry(Entry entry); 265 266 /** 267 * @param schema nested element schema. 268 * @return this builder. 269 */ 270 Builder withElementSchema(Schema schema); 271 272 /** 273 * @param props schema properties 274 * @return this builder 275 */ 276 Builder withProps(Map<String, String> props); 277 278 /** 279 * 280 * @param key the prop key name 281 * @param value the prop value 282 * @return this builder 283 */ 284 Builder withProp(String key, String value); 285 286 /** 287 * @return the described schema. 288 */ 289 Schema build(); 290 } 291 292 /** 293 * Sanitize name to be avro compatible. 294 * 295 * @param name : original name. 296 * @return avro compatible name. 297 */ 298 static String sanitizeConnectionName(final String name) { 299 if (name == null || name.isEmpty()) { 300 return name; 301 } 302 303 char current = name.charAt(0); 304 final CharsetEncoder ascii = Charset.forName(StandardCharsets.US_ASCII.name()).newEncoder(); 305 final boolean skipFirstChar = ((!ascii.canEncode(current)) || (!Character.isLetter(current) && current != '_')) 306 && name.length() > 1 && (!Character.isDigit(name.charAt(1))); 307 308 final StringBuilder sanitizedBuilder = new StringBuilder(); 309 310 if (!skipFirstChar) { 311 if (((!Character.isLetter(current)) && current != '_') || (!ascii.canEncode(current))) { 312 sanitizedBuilder.append('_'); 313 } else { 314 sanitizedBuilder.append(current); 315 } 316 } 317 for (int i = 1; i < name.length(); i++) { 318 current = name.charAt(i); 319 if (!ascii.canEncode(current)) { 320 if (Character.isLowerCase(current) || Character.isUpperCase(current)) { 321 sanitizedBuilder.append('_'); 322 } else { 323 final byte[] encoded = 324 Base64.getEncoder().encode(name.substring(i, i + 1).getBytes(StandardCharsets.UTF_8)); 325 final String enc = new String(encoded); 326 if (sanitizedBuilder.length() == 0 && Character.isDigit(enc.charAt(0))) { 327 sanitizedBuilder.append('_'); 328 } 329 for (int iter = 0; iter < enc.length(); iter++) { 330 if (Character.isLetterOrDigit(enc.charAt(iter))) { 331 sanitizedBuilder.append(enc.charAt(iter)); 332 } else { 333 sanitizedBuilder.append('_'); 334 } 335 } 336 } 337 } else if (Character.isLetterOrDigit(current)) { 338 sanitizedBuilder.append(current); 339 } else { 340 sanitizedBuilder.append('_'); 341 } 342 343 } 344 return sanitizedBuilder.toString(); 345 } 346 347}